提交 91735832 编写于 作者: L Linus Torvalds

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma

Pull rdma fixes from Doug Ledford:
 "Second -rc update for 4.14.

  Both Mellanox and Intel had a series of -rc fixes that landed this
  week. The Mellanox bunch is spread throughout the stack and not just
  in their driver, where as the Intel bunch was mostly in the hfi1
  driver. And, several of the fixes in the hfi1 driver were more than
  just simple 5 line fixes. As a result, the hfi1 driver fixes has a
  sizable LOC count.

  Everything else is as one would expect in an RC cycle in terms of LOC
  count. One item that might jump out and make you think "That's not an
  rc item" is the fix that corrects a typo. But, that change fixes a
  typo in a user visible API that was just added in this merge window,
  so if we fix it now, we can fix it. If we don't, the typo is in the
  API forever. Another that might not appear to be a fix at first glance
  is the Simplify mlx5_ib_cont_pages patch, but the simplification
  allows them to fix a bug in the existing function whenever the length
  of an SGE exceeded page size. We also had to revert one patch from the
  merge window that was wrong.

  Summary:

   - a few core fixes
   - a few ipoib fixes
   - a few mlx5 fixes
   - a 7-patch hfi1 related series"

* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma:
  IB/hfi1: Unsuccessful PCIe caps tuning should not fail driver load
  IB/hfi1: On error, fix use after free during user context setup
  Revert "IB/ipoib: Update broadcast object if PKey value was changed in index 0"
  IB/hfi1: Return correct value in general interrupt handler
  IB/hfi1: Check eeprom config partition validity
  IB/hfi1: Only reset QSFP after link up and turn off AOC TX
  IB/hfi1: Turn off AOC TX after offline substates
  IB/mlx5: Fix NULL deference on mlx5_ib_update_xlt failure
  IB/mlx5: Simplify mlx5_ib_cont_pages
  IB/ipoib: Fix inconsistency with free_netdev and free_rdma_netdev
  IB/ipoib: Fix sysfs Pkey create<->remove possible deadlock
  IB: Correct MR length field to be 64-bit
  IB/core: Fix qp_sec use after free access
  IB/core: Fix typo in the name of the tag-matching cap struct
......@@ -432,8 +432,10 @@ int ib_create_qp_security(struct ib_qp *qp, struct ib_device *dev)
atomic_set(&qp->qp_sec->error_list_count, 0);
init_completion(&qp->qp_sec->error_complete);
ret = security_ib_alloc_security(&qp->qp_sec->security);
if (ret)
if (ret) {
kfree(qp->qp_sec);
qp->qp_sec = NULL;
}
return ret;
}
......
......@@ -3869,15 +3869,15 @@ int ib_uverbs_ex_query_device(struct ib_uverbs_file *file,
resp.raw_packet_caps = attr.raw_packet_caps;
resp.response_length += sizeof(resp.raw_packet_caps);
if (ucore->outlen < resp.response_length + sizeof(resp.xrq_caps))
if (ucore->outlen < resp.response_length + sizeof(resp.tm_caps))
goto end;
resp.xrq_caps.max_rndv_hdr_size = attr.xrq_caps.max_rndv_hdr_size;
resp.xrq_caps.max_num_tags = attr.xrq_caps.max_num_tags;
resp.xrq_caps.max_ops = attr.xrq_caps.max_ops;
resp.xrq_caps.max_sge = attr.xrq_caps.max_sge;
resp.xrq_caps.flags = attr.xrq_caps.flags;
resp.response_length += sizeof(resp.xrq_caps);
resp.tm_caps.max_rndv_hdr_size = attr.tm_caps.max_rndv_hdr_size;
resp.tm_caps.max_num_tags = attr.tm_caps.max_num_tags;
resp.tm_caps.max_ops = attr.tm_caps.max_ops;
resp.tm_caps.max_sge = attr.tm_caps.max_sge;
resp.tm_caps.flags = attr.tm_caps.flags;
resp.response_length += sizeof(resp.tm_caps);
end:
err = ib_copy_to_udata(ucore, &resp, resp.response_length);
return err;
......
......@@ -1066,6 +1066,8 @@ static int read_idle_sma(struct hfi1_devdata *dd, u64 *data);
static int thermal_init(struct hfi1_devdata *dd);
static void update_statusp(struct hfi1_pportdata *ppd, u32 state);
static int wait_phys_link_offline_substates(struct hfi1_pportdata *ppd,
int msecs);
static int wait_logical_linkstate(struct hfi1_pportdata *ppd, u32 state,
int msecs);
static void log_state_transition(struct hfi1_pportdata *ppd, u32 state);
......@@ -8238,6 +8240,7 @@ static irqreturn_t general_interrupt(int irq, void *data)
u64 regs[CCE_NUM_INT_CSRS];
u32 bit;
int i;
irqreturn_t handled = IRQ_NONE;
this_cpu_inc(*dd->int_counter);
......@@ -8258,9 +8261,10 @@ static irqreturn_t general_interrupt(int irq, void *data)
for_each_set_bit(bit, (unsigned long *)&regs[0],
CCE_NUM_INT_CSRS * 64) {
is_interrupt(dd, bit);
handled = IRQ_HANDLED;
}
return IRQ_HANDLED;
return handled;
}
static irqreturn_t sdma_interrupt(int irq, void *data)
......@@ -9413,7 +9417,7 @@ static void set_qsfp_int_n(struct hfi1_pportdata *ppd, u8 enable)
write_csr(dd, dd->hfi1_id ? ASIC_QSFP2_MASK : ASIC_QSFP1_MASK, mask);
}
void reset_qsfp(struct hfi1_pportdata *ppd)
int reset_qsfp(struct hfi1_pportdata *ppd)
{
struct hfi1_devdata *dd = ppd->dd;
u64 mask, qsfp_mask;
......@@ -9443,6 +9447,13 @@ void reset_qsfp(struct hfi1_pportdata *ppd)
* for alarms and warnings
*/
set_qsfp_int_n(ppd, 1);
/*
* After the reset, AOC transmitters are enabled by default. They need
* to be turned off to complete the QSFP setup before they can be
* enabled again.
*/
return set_qsfp_tx(ppd, 0);
}
static int handle_qsfp_error_conditions(struct hfi1_pportdata *ppd,
......@@ -10305,6 +10316,7 @@ static int goto_offline(struct hfi1_pportdata *ppd, u8 rem_reason)
{
struct hfi1_devdata *dd = ppd->dd;
u32 previous_state;
int offline_state_ret;
int ret;
update_lcb_cache(dd);
......@@ -10326,28 +10338,11 @@ static int goto_offline(struct hfi1_pportdata *ppd, u8 rem_reason)
ppd->offline_disabled_reason =
HFI1_ODR_MASK(OPA_LINKDOWN_REASON_TRANSIENT);
/*
* Wait for offline transition. It can take a while for
* the link to go down.
*/
ret = wait_physical_linkstate(ppd, PLS_OFFLINE, 10000);
if (ret < 0)
return ret;
/*
* Now in charge of LCB - must be after the physical state is
* offline.quiet and before host_link_state is changed.
*/
set_host_lcb_access(dd);
write_csr(dd, DC_LCB_ERR_EN, ~0ull); /* watch LCB errors */
/* make sure the logical state is also down */
ret = wait_logical_linkstate(ppd, IB_PORT_DOWN, 1000);
if (ret)
force_logical_link_state_down(ppd);
ppd->host_link_state = HLS_LINK_COOLDOWN; /* LCB access allowed */
offline_state_ret = wait_phys_link_offline_substates(ppd, 10000);
if (offline_state_ret < 0)
return offline_state_ret;
/* Disabling AOC transmitters */
if (ppd->port_type == PORT_TYPE_QSFP &&
ppd->qsfp_info.limiting_active &&
qsfp_mod_present(ppd)) {
......@@ -10364,6 +10359,30 @@ static int goto_offline(struct hfi1_pportdata *ppd, u8 rem_reason)
}
}
/*
* Wait for the offline.Quiet transition if it hasn't happened yet. It
* can take a while for the link to go down.
*/
if (offline_state_ret != PLS_OFFLINE_QUIET) {
ret = wait_physical_linkstate(ppd, PLS_OFFLINE, 30000);
if (ret < 0)
return ret;
}
/*
* Now in charge of LCB - must be after the physical state is
* offline.quiet and before host_link_state is changed.
*/
set_host_lcb_access(dd);
write_csr(dd, DC_LCB_ERR_EN, ~0ull); /* watch LCB errors */
/* make sure the logical state is also down */
ret = wait_logical_linkstate(ppd, IB_PORT_DOWN, 1000);
if (ret)
force_logical_link_state_down(ppd);
ppd->host_link_state = HLS_LINK_COOLDOWN; /* LCB access allowed */
/*
* The LNI has a mandatory wait time after the physical state
* moves to Offline.Quiet. The wait time may be different
......@@ -10396,6 +10415,9 @@ static int goto_offline(struct hfi1_pportdata *ppd, u8 rem_reason)
& (HLS_DN_POLL | HLS_VERIFY_CAP | HLS_GOING_UP)) {
/* went down while attempting link up */
check_lni_states(ppd);
/* The QSFP doesn't need to be reset on LNI failure */
ppd->qsfp_info.reset_needed = 0;
}
/* the active link width (downgrade) is 0 on link down */
......@@ -12804,6 +12826,39 @@ static int wait_physical_linkstate(struct hfi1_pportdata *ppd, u32 state,
return 0;
}
/*
* wait_phys_link_offline_quiet_substates - wait for any offline substate
* @ppd: port device
* @msecs: the number of milliseconds to wait
*
* Wait up to msecs milliseconds for any offline physical link
* state change to occur.
* Returns 0 if at least one state is reached, otherwise -ETIMEDOUT.
*/
static int wait_phys_link_offline_substates(struct hfi1_pportdata *ppd,
int msecs)
{
u32 read_state;
unsigned long timeout;
timeout = jiffies + msecs_to_jiffies(msecs);
while (1) {
read_state = read_physical_state(ppd->dd);
if ((read_state & 0xF0) == PLS_OFFLINE)
break;
if (time_after(jiffies, timeout)) {
dd_dev_err(ppd->dd,
"timeout waiting for phy link offline.quiet substates. Read state 0x%x, %dms\n",
read_state, msecs);
return -ETIMEDOUT;
}
usleep_range(1950, 2050); /* sleep 2ms-ish */
}
log_state_transition(ppd, read_state);
return read_state;
}
#define CLEAR_STATIC_RATE_CONTROL_SMASK(r) \
(r &= ~SEND_CTXT_CHECK_ENABLE_DISALLOW_PBC_STATIC_RATE_CONTROL_SMASK)
......
......@@ -204,6 +204,7 @@
#define PLS_OFFLINE_READY_TO_QUIET_LT 0x92
#define PLS_OFFLINE_REPORT_FAILURE 0x93
#define PLS_OFFLINE_READY_TO_QUIET_BCC 0x94
#define PLS_OFFLINE_QUIET_DURATION 0x95
#define PLS_POLLING 0x20
#define PLS_POLLING_QUIET 0x20
#define PLS_POLLING_ACTIVE 0x21
......@@ -722,7 +723,7 @@ void handle_link_downgrade(struct work_struct *work);
void handle_link_bounce(struct work_struct *work);
void handle_start_link(struct work_struct *work);
void handle_sma_message(struct work_struct *work);
void reset_qsfp(struct hfi1_pportdata *ppd);
int reset_qsfp(struct hfi1_pportdata *ppd);
void qsfp_event(struct work_struct *work);
void start_freeze_handling(struct hfi1_pportdata *ppd, int flags);
int send_idle_sma(struct hfi1_devdata *dd, u64 message);
......
......@@ -204,7 +204,10 @@ int eprom_init(struct hfi1_devdata *dd)
return ret;
}
/* magic character sequence that trails an image */
/* magic character sequence that begins an image */
#define IMAGE_START_MAGIC "APO="
/* magic character sequence that might trail an image */
#define IMAGE_TRAIL_MAGIC "egamiAPO"
/* EPROM file types */
......@@ -250,6 +253,7 @@ static int read_partition_platform_config(struct hfi1_devdata *dd, void **data,
{
void *buffer;
void *p;
u32 length;
int ret;
buffer = kmalloc(P1_SIZE, GFP_KERNEL);
......@@ -262,15 +266,21 @@ static int read_partition_platform_config(struct hfi1_devdata *dd, void **data,
return ret;
}
/* scan for image magic that may trail the actual data */
p = strnstr(buffer, IMAGE_TRAIL_MAGIC, P1_SIZE);
if (!p) {
/* config partition is valid only if it starts with IMAGE_START_MAGIC */
if (memcmp(buffer, IMAGE_START_MAGIC, strlen(IMAGE_START_MAGIC))) {
kfree(buffer);
return -ENOENT;
}
/* scan for image magic that may trail the actual data */
p = strnstr(buffer, IMAGE_TRAIL_MAGIC, P1_SIZE);
if (p)
length = p - buffer;
else
length = P1_SIZE;
*data = buffer;
*size = p - buffer;
*size = length;
return 0;
}
......
......@@ -930,15 +930,8 @@ static int assign_ctxt(struct hfi1_filedata *fd, struct hfi1_user_info *uinfo)
switch (ret) {
case 0:
ret = setup_base_ctxt(fd, uctxt);
if (uctxt->subctxt_cnt) {
/*
* Base context is done (successfully or not), notify
* anybody using a sub-context that is waiting for
* this completion.
*/
clear_bit(HFI1_CTXT_BASE_UNINIT, &uctxt->event_flags);
wake_up(&uctxt->wait);
}
if (ret)
deallocate_ctxt(uctxt);
break;
case 1:
ret = complete_subctxt(fd);
......@@ -1305,25 +1298,25 @@ static int setup_base_ctxt(struct hfi1_filedata *fd,
/* Now allocate the RcvHdr queue and eager buffers. */
ret = hfi1_create_rcvhdrq(dd, uctxt);
if (ret)
return ret;
goto done;
ret = hfi1_setup_eagerbufs(uctxt);
if (ret)
goto setup_failed;
goto done;
/* If sub-contexts are enabled, do the appropriate setup */
if (uctxt->subctxt_cnt)
ret = setup_subctxt(uctxt);
if (ret)
goto setup_failed;
goto done;
ret = hfi1_alloc_ctxt_rcv_groups(uctxt);
if (ret)
goto setup_failed;
goto done;
ret = init_user_ctxt(fd, uctxt);
if (ret)
goto setup_failed;
goto done;
user_init(uctxt);
......@@ -1331,12 +1324,22 @@ static int setup_base_ctxt(struct hfi1_filedata *fd,
fd->uctxt = uctxt;
hfi1_rcd_get(uctxt);
return 0;
done:
if (uctxt->subctxt_cnt) {
/*
* On error, set the failed bit so sub-contexts will clean up
* correctly.
*/
if (ret)
set_bit(HFI1_CTXT_BASE_FAILED, &uctxt->event_flags);
setup_failed:
/* Set the failed bit so sub-context init can do the right thing */
set_bit(HFI1_CTXT_BASE_FAILED, &uctxt->event_flags);
deallocate_ctxt(uctxt);
/*
* Base context is done (successfully or not), notify anybody
* using a sub-context that is waiting for this completion.
*/
clear_bit(HFI1_CTXT_BASE_UNINIT, &uctxt->event_flags);
wake_up(&uctxt->wait);
}
return ret;
}
......
......@@ -68,7 +68,7 @@
/*
* Code to adjust PCIe capabilities.
*/
static int tune_pcie_caps(struct hfi1_devdata *);
static void tune_pcie_caps(struct hfi1_devdata *);
/*
* Do all the common PCIe setup and initialization.
......@@ -351,7 +351,7 @@ int pcie_speeds(struct hfi1_devdata *dd)
*/
int request_msix(struct hfi1_devdata *dd, u32 msireq)
{
int nvec, ret;
int nvec;
nvec = pci_alloc_irq_vectors(dd->pcidev, 1, msireq,
PCI_IRQ_MSIX | PCI_IRQ_LEGACY);
......@@ -360,12 +360,7 @@ int request_msix(struct hfi1_devdata *dd, u32 msireq)
return nvec;
}
ret = tune_pcie_caps(dd);
if (ret) {
dd_dev_err(dd, "tune_pcie_caps() failed: %d\n", ret);
pci_free_irq_vectors(dd->pcidev);
return ret;
}
tune_pcie_caps(dd);
/* check for legacy IRQ */
if (nvec == 1 && !dd->pcidev->msix_enabled)
......@@ -502,7 +497,7 @@ uint aspm_mode = ASPM_MODE_DISABLED;
module_param_named(aspm, aspm_mode, uint, S_IRUGO);
MODULE_PARM_DESC(aspm, "PCIe ASPM: 0: disable, 1: enable, 2: dynamic");
static int tune_pcie_caps(struct hfi1_devdata *dd)
static void tune_pcie_caps(struct hfi1_devdata *dd)
{
struct pci_dev *parent;
u16 rc_mpss, rc_mps, ep_mpss, ep_mps;
......@@ -513,22 +508,14 @@ static int tune_pcie_caps(struct hfi1_devdata *dd)
* Turn on extended tags in DevCtl in case the BIOS has turned it off
* to improve WFR SDMA bandwidth
*/
ret = pcie_capability_read_word(dd->pcidev,
PCI_EXP_DEVCTL, &ectl);
if (ret) {
dd_dev_err(dd, "Unable to read from PCI config\n");
return ret;
}
if (!(ectl & PCI_EXP_DEVCTL_EXT_TAG)) {
ret = pcie_capability_read_word(dd->pcidev, PCI_EXP_DEVCTL, &ectl);
if ((!ret) && !(ectl & PCI_EXP_DEVCTL_EXT_TAG)) {
dd_dev_info(dd, "Enabling PCIe extended tags\n");
ectl |= PCI_EXP_DEVCTL_EXT_TAG;
ret = pcie_capability_write_word(dd->pcidev,
PCI_EXP_DEVCTL, ectl);
if (ret) {
dd_dev_err(dd, "Unable to write to PCI config\n");
return ret;
}
if (ret)
dd_dev_info(dd, "Unable to write to PCI config\n");
}
/* Find out supported and configured values for parent (root) */
parent = dd->pcidev->bus->self;
......@@ -536,15 +523,22 @@ static int tune_pcie_caps(struct hfi1_devdata *dd)
* The driver cannot perform the tuning if it does not have
* access to the upstream component.
*/
if (!parent)
return -EINVAL;
if (!parent) {
dd_dev_info(dd, "Parent not found\n");
return;
}
if (!pci_is_root_bus(parent->bus)) {
dd_dev_info(dd, "Parent not root\n");
return -EINVAL;
return;
}
if (!pci_is_pcie(parent)) {
dd_dev_info(dd, "Parent is not PCI Express capable\n");
return;
}
if (!pci_is_pcie(dd->pcidev)) {
dd_dev_info(dd, "PCI device is not PCI Express capable\n");
return;
}
if (!pci_is_pcie(parent) || !pci_is_pcie(dd->pcidev))
return -EINVAL;
rc_mpss = parent->pcie_mpss;
rc_mps = ffs(pcie_get_mps(parent)) - 8;
/* Find out supported and configured values for endpoint (us) */
......@@ -590,8 +584,6 @@ static int tune_pcie_caps(struct hfi1_devdata *dd)
ep_mrrs = max_mrrs;
pcie_set_readrq(dd->pcidev, ep_mrrs);
}
return 0;
}
/* End of PCIe capability tuning */
......
......@@ -790,7 +790,9 @@ static int tune_active_qsfp(struct hfi1_pportdata *ppd, u32 *ptr_tx_preset,
* reuse of stale settings established in our previous pass through.
*/
if (ppd->qsfp_info.reset_needed) {
reset_qsfp(ppd);
ret = reset_qsfp(ppd);
if (ret)
return ret;
refresh_qsfp_cache(ppd, &ppd->qsfp_info);
} else {
ppd->qsfp_info.reset_needed = 1;
......
......@@ -778,13 +778,13 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
}
if (MLX5_CAP_GEN(mdev, tag_matching)) {
props->xrq_caps.max_rndv_hdr_size = MLX5_TM_MAX_RNDV_MSG_SIZE;
props->xrq_caps.max_num_tags =
props->tm_caps.max_rndv_hdr_size = MLX5_TM_MAX_RNDV_MSG_SIZE;
props->tm_caps.max_num_tags =
(1 << MLX5_CAP_GEN(mdev, log_tag_matching_list_sz)) - 1;
props->xrq_caps.flags = IB_TM_CAP_RC;
props->xrq_caps.max_ops =
props->tm_caps.flags = IB_TM_CAP_RC;
props->tm_caps.max_ops =
1 << MLX5_CAP_GEN(mdev, log_max_qp_sz);
props->xrq_caps.max_sge = MLX5_TM_MAX_SGE;
props->tm_caps.max_sge = MLX5_TM_MAX_SGE;
}
if (field_avail(typeof(resp), cqe_comp_caps, uhw->outlen)) {
......
......@@ -50,13 +50,9 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr,
{
unsigned long tmp;
unsigned long m;
int i, k;
u64 base = 0;
int p = 0;
int skip;
int mask;
u64 len;
u64 pfn;
u64 base = ~0, p = 0;
u64 len, pfn;
int i = 0;
struct scatterlist *sg;
int entry;
unsigned long page_shift = umem->page_shift;
......@@ -76,33 +72,24 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr,
m = find_first_bit(&tmp, BITS_PER_LONG);
if (max_page_shift)
m = min_t(unsigned long, max_page_shift - page_shift, m);
skip = 1 << m;
mask = skip - 1;
i = 0;
for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
len = sg_dma_len(sg) >> page_shift;
pfn = sg_dma_address(sg) >> page_shift;
for (k = 0; k < len; k++) {
if (!(i & mask)) {
tmp = (unsigned long)pfn;
m = min_t(unsigned long, m, find_first_bit(&tmp, BITS_PER_LONG));
skip = 1 << m;
mask = skip - 1;
base = pfn;
p = 0;
} else {
if (base + p != pfn) {
tmp = (unsigned long)p;
m = find_first_bit(&tmp, BITS_PER_LONG);
skip = 1 << m;
mask = skip - 1;
base = pfn;
p = 0;
}
}
p++;
i++;
if (base + p != pfn) {
/* If either the offset or the new
* base are unaligned update m
*/
tmp = (unsigned long)(pfn | p);
if (!IS_ALIGNED(tmp, 1 << m))
m = find_first_bit(&tmp, BITS_PER_LONG);
base = pfn;
p = 0;
}
p += len;
i += len;
}
if (i) {
......
......@@ -47,7 +47,8 @@ enum {
#define MLX5_UMR_ALIGN 2048
static int clean_mr(struct mlx5_ib_mr *mr);
static int clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr);
static int dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr);
static int mr_cache_max_order(struct mlx5_ib_dev *dev);
static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr);
......@@ -1270,8 +1271,9 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
err = mlx5_ib_update_xlt(mr, 0, ncont, page_shift,
update_xlt_flags);
if (err) {
mlx5_ib_dereg_mr(&mr->ibmr);
dereg_mr(dev, mr);
return ERR_PTR(err);
}
}
......@@ -1356,7 +1358,7 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
err = mr_umem_get(pd, addr, len, access_flags, &mr->umem,
&npages, &page_shift, &ncont, &order);
if (err < 0) {
clean_mr(mr);
clean_mr(dev, mr);
return err;
}
}
......@@ -1410,7 +1412,7 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
if (err) {
mlx5_ib_warn(dev, "Failed to rereg UMR\n");
ib_umem_release(mr->umem);
clean_mr(mr);
clean_mr(dev, mr);
return err;
}
}
......@@ -1469,9 +1471,8 @@ mlx5_free_priv_descs(struct mlx5_ib_mr *mr)
}
}
static int clean_mr(struct mlx5_ib_mr *mr)
static int clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
{
struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device);
int allocated_from_cache = mr->allocated_from_cache;
int err;
......@@ -1507,10 +1508,8 @@ static int clean_mr(struct mlx5_ib_mr *mr)
return 0;
}
int mlx5_ib_dereg_mr(struct ib_mr *ibmr)
static int dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
{
struct mlx5_ib_dev *dev = to_mdev(ibmr->device);
struct mlx5_ib_mr *mr = to_mmr(ibmr);
int npages = mr->npages;
struct ib_umem *umem = mr->umem;
......@@ -1539,7 +1538,7 @@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr)
}
#endif
clean_mr(mr);
clean_mr(dev, mr);
if (umem) {
ib_umem_release(umem);
......@@ -1549,6 +1548,14 @@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr)
return 0;
}
int mlx5_ib_dereg_mr(struct ib_mr *ibmr)
{
struct mlx5_ib_dev *dev = to_mdev(ibmr->device);
struct mlx5_ib_mr *mr = to_mmr(ibmr);
return dereg_mr(dev, mr);
}
struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd,
enum ib_mr_type mr_type,
u32 max_num_sg)
......
......@@ -3232,7 +3232,7 @@ static int nes_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr,
mr->ibmr.iova);
set_wqe_32bit_value(wqe->wqe_words,
NES_IWARP_SQ_FMR_WQE_LENGTH_LOW_IDX,
mr->ibmr.length);
lower_32_bits(mr->ibmr.length));
set_wqe_32bit_value(wqe->wqe_words,
NES_IWARP_SQ_FMR_WQE_LENGTH_HIGH_IDX, 0);
set_wqe_32bit_value(wqe->wqe_words,
......@@ -3274,7 +3274,7 @@ static int nes_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr,
mr->npages * 8);
nes_debug(NES_DBG_IW_TX, "SQ_REG_MR: iova_start: %llx, "
"length: %d, rkey: %0x, pgl_paddr: %llx, "
"length: %lld, rkey: %0x, pgl_paddr: %llx, "
"page_list_len: %u, wqe_misc: %x\n",
(unsigned long long) mr->ibmr.iova,
mr->ibmr.length,
......
......@@ -1000,19 +1000,6 @@ static inline int update_parent_pkey(struct ipoib_dev_priv *priv)
*/
priv->dev->broadcast[8] = priv->pkey >> 8;
priv->dev->broadcast[9] = priv->pkey & 0xff;
/*
* Update the broadcast address in the priv->broadcast object,
* in case it already exists, otherwise no one will do that.
*/
if (priv->broadcast) {
spin_lock_irq(&priv->lock);
memcpy(priv->broadcast->mcmember.mgid.raw,
priv->dev->broadcast + 4,
sizeof(union ib_gid));
spin_unlock_irq(&priv->lock);
}
return 0;
}
......
......@@ -2180,6 +2180,7 @@ static struct net_device *ipoib_add_port(const char *format,
{
struct ipoib_dev_priv *priv;
struct ib_port_attr attr;
struct rdma_netdev *rn;
int result = -ENOMEM;
priv = ipoib_intf_alloc(hca, port, format);
......@@ -2279,7 +2280,8 @@ static struct net_device *ipoib_add_port(const char *format,
ipoib_dev_cleanup(priv->dev);
device_init_failed:
free_netdev(priv->dev);
rn = netdev_priv(priv->dev);
rn->free_rdma_netdev(priv->dev);
kfree(priv);
alloc_mem_failed:
......@@ -2328,7 +2330,7 @@ static void ipoib_remove_one(struct ib_device *device, void *client_data)
return;
list_for_each_entry_safe(priv, tmp, dev_list, list) {
struct rdma_netdev *rn = netdev_priv(priv->dev);
struct rdma_netdev *parent_rn = netdev_priv(priv->dev);
ib_unregister_event_handler(&priv->event_handler);
flush_workqueue(ipoib_workqueue);
......@@ -2350,10 +2352,15 @@ static void ipoib_remove_one(struct ib_device *device, void *client_data)
unregister_netdev(priv->dev);
mutex_unlock(&priv->sysfs_mutex);
rn->free_rdma_netdev(priv->dev);
parent_rn->free_rdma_netdev(priv->dev);
list_for_each_entry_safe(cpriv, tcpriv, &priv->child_intfs, list) {
struct rdma_netdev *child_rn;
list_for_each_entry_safe(cpriv, tcpriv, &priv->child_intfs, list)
child_rn = netdev_priv(cpriv->dev);
child_rn->free_rdma_netdev(cpriv->dev);
kfree(cpriv);
}
kfree(priv);
}
......
......@@ -141,14 +141,17 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey)
return restart_syscall();
}
priv = ipoib_intf_alloc(ppriv->ca, ppriv->port, intf_name);
if (!priv) {
if (!down_write_trylock(&ppriv->vlan_rwsem)) {
rtnl_unlock();
mutex_unlock(&ppriv->sysfs_mutex);
return -ENOMEM;
return restart_syscall();
}
down_write(&ppriv->vlan_rwsem);
priv = ipoib_intf_alloc(ppriv->ca, ppriv->port, intf_name);
if (!priv) {
result = -ENOMEM;
goto out;
}
/*
* First ensure this isn't a duplicate. We check the parent device and
......@@ -175,8 +178,11 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey)
rtnl_unlock();
mutex_unlock(&ppriv->sysfs_mutex);
if (result) {
free_netdev(priv->dev);
if (result && priv) {
struct rdma_netdev *rn;
rn = netdev_priv(priv->dev);
rn->free_rdma_netdev(priv->dev);
kfree(priv);
}
......@@ -204,7 +210,12 @@ int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey)
return restart_syscall();
}
down_write(&ppriv->vlan_rwsem);
if (!down_write_trylock(&ppriv->vlan_rwsem)) {
rtnl_unlock();
mutex_unlock(&ppriv->sysfs_mutex);
return restart_syscall();
}
list_for_each_entry_safe(priv, tpriv, &ppriv->child_intfs, list) {
if (priv->pkey == pkey &&
priv->child_type == IPOIB_LEGACY_CHILD) {
......@@ -224,7 +235,10 @@ int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey)
mutex_unlock(&ppriv->sysfs_mutex);
if (dev) {
free_netdev(dev);
struct rdma_netdev *rn;
rn = netdev_priv(dev);
rn->free_rdma_netdev(priv->dev);
kfree(priv);
return 0;
}
......
......@@ -154,7 +154,7 @@ static void iser_dump_page_vec(struct iser_page_vec *page_vec)
{
int i;
iser_err("page vec npages %d data length %d\n",
iser_err("page vec npages %d data length %lld\n",
page_vec->npages, page_vec->fake_mr.length);
for (i = 0; i < page_vec->npages; i++)
iser_err("vec[%d]: %llx\n", i, page_vec->pages[i]);
......
......@@ -285,7 +285,7 @@ enum ib_tm_cap_flags {
IB_TM_CAP_RC = 1 << 0,
};
struct ib_xrq_caps {
struct ib_tm_caps {
/* Max size of RNDV header */
u32 max_rndv_hdr_size;
/* Max number of entries in tag matching list */
......@@ -358,7 +358,7 @@ struct ib_device_attr {
struct ib_rss_caps rss_caps;
u32 max_wq_type_rq;
u32 raw_packet_caps; /* Use ib_raw_packet_caps enum */
struct ib_xrq_caps xrq_caps;
struct ib_tm_caps tm_caps;
};
enum ib_mtu {
......@@ -1739,7 +1739,7 @@ struct ib_mr {
u32 lkey;
u32 rkey;
u64 iova;
u32 length;
u64 length;
unsigned int page_size;
bool need_inval;
union {
......
......@@ -261,7 +261,7 @@ struct ib_uverbs_ex_query_device_resp {
struct ib_uverbs_rss_caps rss_caps;
__u32 max_wq_type_rq;
__u32 raw_packet_caps;
struct ib_uverbs_tm_caps xrq_caps;
struct ib_uverbs_tm_caps tm_caps;
};
struct ib_uverbs_query_port {
......
......@@ -401,7 +401,7 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
if (unlikely(n != mw->mw_nents))
goto out_mapmr_err;
dprintk("RPC: %s: Using frmr %p to map %u segments (%u bytes)\n",
dprintk("RPC: %s: Using frmr %p to map %u segments (%llu bytes)\n",
__func__, frmr, mw->mw_nents, mr->length);
key = (u8)(mr->rkey & 0x000000FF);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册