main.c 101.1 KB
Newer Older
1 2 3
/*
 * Copyright (c) 2004, 2005 Topspin Communications.  All rights reserved.
 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
4
 * Copyright (c) 2005, 2006, 2007, 2008 Mellanox Technologies. All rights reserved.
5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40
 * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved.
 *
 * This software is available to you under a choice of one of two
 * licenses.  You may choose to be licensed under the terms of the GNU
 * General Public License (GPL) Version 2, available from the file
 * COPYING in the main directory of this source tree, or the
 * OpenIB.org BSD license below:
 *
 *     Redistribution and use in source and binary forms, with or
 *     without modification, are permitted provided that the following
 *     conditions are met:
 *
 *      - Redistributions of source code must retain the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer.
 *
 *      - Redistributions in binary form must reproduce the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer in the documentation and/or other materials
 *        provided with the distribution.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */

#include <linux/module.h>
#include <linux/init.h>
#include <linux/errno.h>
#include <linux/pci.h>
#include <linux/dma-mapping.h>
41
#include <linux/slab.h>
42
#include <linux/io-mapping.h>
43
#include <linux/delay.h>
44
#include <linux/kmod.h>
45 46 47 48 49 50 51 52 53 54 55 56 57

#include <linux/mlx4/device.h>
#include <linux/mlx4/doorbell.h>

#include "mlx4.h"
#include "fw.h"
#include "icm.h"

MODULE_AUTHOR("Roland Dreier");
MODULE_DESCRIPTION("Mellanox ConnectX HCA low-level driver");
MODULE_LICENSE("Dual BSD/GPL");
MODULE_VERSION(DRV_VERSION);

58 59
struct workqueue_struct *mlx4_wq;

60 61 62 63 64 65 66 67 68 69
#ifdef CONFIG_MLX4_DEBUG

int mlx4_debug_level = 0;
module_param_named(debug_level, mlx4_debug_level, int, 0644);
MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0");

#endif /* CONFIG_MLX4_DEBUG */

#ifdef CONFIG_PCI_MSI

70
static int msi_x = 1;
71 72 73 74 75 76 77 78 79
module_param(msi_x, int, 0444);
MODULE_PARM_DESC(msi_x, "attempt to use MSI-X if nonzero");

#else /* CONFIG_PCI_MSI */

#define msi_x (0)

#endif /* CONFIG_PCI_MSI */

80
static uint8_t num_vfs[3] = {0, 0, 0};
81
static int num_vfs_argc;
82 83 84 85 86
module_param_array(num_vfs, byte , &num_vfs_argc, 0444);
MODULE_PARM_DESC(num_vfs, "enable #num_vfs functions if num_vfs > 0\n"
			  "num_vfs=port1,port2,port1+2");

static uint8_t probe_vf[3] = {0, 0, 0};
87
static int probe_vfs_argc;
88 89 90
module_param_array(probe_vf, byte, &probe_vfs_argc, 0444);
MODULE_PARM_DESC(probe_vf, "number of vfs to probe by pf driver (num_vfs > 0)\n"
			   "probe_vf=port1,port2,port1+2");
91

92
int mlx4_log_num_mgm_entry_size = MLX4_DEFAULT_MGM_LOG_ENTRY_SIZE;
93 94 95 96
module_param_named(log_num_mgm_entry_size,
			mlx4_log_num_mgm_entry_size, int, 0444);
MODULE_PARM_DESC(log_num_mgm_entry_size, "log mgm size, that defines the num"
					 " of qp per mcg, for example:"
97
					 " 10 gives 248.range: 7 <="
98
					 " log_num_mgm_entry_size <= 12."
99 100
					 " To activate device managed"
					 " flow steering when available, set to -1");
101

102
static bool enable_64b_cqe_eqe = true;
O
Or Gerlitz 已提交
103 104
module_param(enable_64b_cqe_eqe, bool, 0444);
MODULE_PARM_DESC(enable_64b_cqe_eqe,
105
		 "Enable 64 byte CQEs/EQEs when the FW supports this (default: True)");
O
Or Gerlitz 已提交
106

107
#define PF_CONTEXT_BEHAVIOUR_MASK	(MLX4_FUNC_CAP_64B_EQE_CQE | \
108 109
					 MLX4_FUNC_CAP_EQE_CQE_STRIDE | \
					 MLX4_FUNC_CAP_DMFS_A0_STATIC)
110

111 112
#define RESET_PERSIST_MASK_FLAGS	(MLX4_FLAG_SRIOV)

113
static char mlx4_version[] =
114 115 116 117
	DRV_NAME ": Mellanox ConnectX core driver v"
	DRV_VERSION " (" DRV_RELDATE ")\n";

static struct mlx4_profile default_profile = {
118
	.num_qp		= 1 << 18,
119
	.num_srq	= 1 << 16,
120
	.rdmarc_per_qp	= 1 << 4,
121 122
	.num_cq		= 1 << 16,
	.num_mcg	= 1 << 13,
123
	.num_mpt	= 1 << 19,
124
	.num_mtt	= 1 << 20, /* It is really num mtt segements */
125 126
};

127 128 129 130 131 132 133 134 135 136
static struct mlx4_profile low_mem_profile = {
	.num_qp		= 1 << 17,
	.num_srq	= 1 << 6,
	.rdmarc_per_qp	= 1 << 4,
	.num_cq		= 1 << 8,
	.num_mcg	= 1 << 8,
	.num_mpt	= 1 << 9,
	.num_mtt	= 1 << 7,
};

137
static int log_num_mac = 7;
138 139 140 141 142 143
module_param_named(log_num_mac, log_num_mac, int, 0444);
MODULE_PARM_DESC(log_num_mac, "Log2 max number of MACs per ETH port (1-7)");

static int log_num_vlan;
module_param_named(log_num_vlan, log_num_vlan, int, 0444);
MODULE_PARM_DESC(log_num_vlan, "Log2 max number of VLANs per ETH port (0-7)");
144 145
/* Log2 max number of VLANs per ETH port (0-7) */
#define MLX4_LOG_NUM_VLANS 7
146 147
#define MLX4_MIN_LOG_NUM_VLANS 0
#define MLX4_MIN_LOG_NUM_MAC 1
148

149
static bool use_prio;
150
module_param_named(use_prio, use_prio, bool, 0444);
151
MODULE_PARM_DESC(use_prio, "Enable steering by VLAN priority on ETH ports (deprecated)");
152

153
int log_mtts_per_seg = ilog2(MLX4_MTT_ENTRY_PER_SEG);
154
module_param_named(log_mtts_per_seg, log_mtts_per_seg, int, 0444);
155
MODULE_PARM_DESC(log_mtts_per_seg, "Log2 number of MTT entries per segment (1-7)");
156

157
static int port_type_array[2] = {MLX4_PORT_TYPE_NONE, MLX4_PORT_TYPE_NONE};
158 159
static int arr_argc = 2;
module_param_array(port_type_array, int, &arr_argc, 0444);
160 161
MODULE_PARM_DESC(port_type_array, "Array of port types: HW_DEFAULT (0) is default "
				"1 for IB, 2 for Ethernet");
162 163 164 165 166 167 168

struct mlx4_port_config {
	struct list_head list;
	enum mlx4_port_type port_type[MLX4_MAX_PORTS + 1];
	struct pci_dev *pdev;
};

169 170
static atomic_t pf_loading = ATOMIC_INIT(0);

171 172
int mlx4_check_port_params(struct mlx4_dev *dev,
			   enum mlx4_port_type *port_type)
173 174 175
{
	int i;

176 177 178
	if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP)) {
		for (i = 0; i < dev->caps.num_ports - 1; i++) {
			if (port_type[i] != port_type[i + 1]) {
J
Joe Perches 已提交
179
				mlx4_err(dev, "Only same port types supported on this HCA, aborting\n");
180 181
				return -EINVAL;
			}
182 183 184 185 186
		}
	}

	for (i = 0; i < dev->caps.num_ports; i++) {
		if (!(port_type[i] & dev->caps.supported_type[i+1])) {
J
Joe Perches 已提交
187 188
			mlx4_err(dev, "Requested port type for port %d is not supported on this HCA\n",
				 i + 1);
189 190 191 192 193 194 195 196 197 198 199
			return -EINVAL;
		}
	}
	return 0;
}

static void mlx4_set_port_mask(struct mlx4_dev *dev)
{
	int i;

	for (i = 1; i <= dev->caps.num_ports; ++i)
200
		dev->caps.port_mask[i] = dev->caps.port_type[i];
201
}
202

203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225
enum {
	MLX4_QUERY_FUNC_NUM_SYS_EQS = 1 << 0,
};

static int mlx4_query_func(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
{
	int err = 0;
	struct mlx4_func func;

	if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS) {
		err = mlx4_QUERY_FUNC(dev, &func, 0);
		if (err) {
			mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n");
			return err;
		}
		dev_cap->max_eqs = func.max_eq;
		dev_cap->reserved_eqs = func.rsvd_eqs;
		dev_cap->reserved_uars = func.rsvd_uars;
		err |= MLX4_QUERY_FUNC_NUM_SYS_EQS;
	}
	return err;
}

226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253
static void mlx4_enable_cqe_eqe_stride(struct mlx4_dev *dev)
{
	struct mlx4_caps *dev_cap = &dev->caps;

	/* FW not supporting or cancelled by user */
	if (!(dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_EQE_STRIDE) ||
	    !(dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_CQE_STRIDE))
		return;

	/* Must have 64B CQE_EQE enabled by FW to use bigger stride
	 * When FW has NCSI it may decide not to report 64B CQE/EQEs
	 */
	if (!(dev_cap->flags & MLX4_DEV_CAP_FLAG_64B_EQE) ||
	    !(dev_cap->flags & MLX4_DEV_CAP_FLAG_64B_CQE)) {
		dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_CQE_STRIDE;
		dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_EQE_STRIDE;
		return;
	}

	if (cache_line_size() == 128 || cache_line_size() == 256) {
		mlx4_dbg(dev, "Enabling CQE stride cacheLine supported\n");
		/* Changing the real data inside CQE size to 32B */
		dev_cap->flags &= ~MLX4_DEV_CAP_FLAG_64B_CQE;
		dev_cap->flags &= ~MLX4_DEV_CAP_FLAG_64B_EQE;

		if (mlx4_is_master(dev))
			dev_cap->function_caps |= MLX4_FUNC_CAP_EQE_CQE_STRIDE;
	} else {
254 255
		if (cache_line_size() != 32  && cache_line_size() != 64)
			mlx4_dbg(dev, "Disabling CQE stride, cacheLine size unsupported\n");
256 257 258 259 260
		dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_CQE_STRIDE;
		dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_EQE_STRIDE;
	}
}

M
Matan Barak 已提交
261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300
static int _mlx4_dev_port(struct mlx4_dev *dev, int port,
			  struct mlx4_port_cap *port_cap)
{
	dev->caps.vl_cap[port]	    = port_cap->max_vl;
	dev->caps.ib_mtu_cap[port]	    = port_cap->ib_mtu;
	dev->phys_caps.gid_phys_table_len[port]  = port_cap->max_gids;
	dev->phys_caps.pkey_phys_table_len[port] = port_cap->max_pkeys;
	/* set gid and pkey table operating lengths by default
	 * to non-sriov values
	 */
	dev->caps.gid_table_len[port]  = port_cap->max_gids;
	dev->caps.pkey_table_len[port] = port_cap->max_pkeys;
	dev->caps.port_width_cap[port] = port_cap->max_port_width;
	dev->caps.eth_mtu_cap[port]    = port_cap->eth_mtu;
	dev->caps.def_mac[port]        = port_cap->def_mac;
	dev->caps.supported_type[port] = port_cap->supported_port_types;
	dev->caps.suggested_type[port] = port_cap->suggested_type;
	dev->caps.default_sense[port] = port_cap->default_sense;
	dev->caps.trans_type[port]	    = port_cap->trans_type;
	dev->caps.vendor_oui[port]     = port_cap->vendor_oui;
	dev->caps.wavelength[port]     = port_cap->wavelength;
	dev->caps.trans_code[port]     = port_cap->trans_code;

	return 0;
}

static int mlx4_dev_port(struct mlx4_dev *dev, int port,
			 struct mlx4_port_cap *port_cap)
{
	int err = 0;

	err = mlx4_QUERY_PORT(dev, port, port_cap);

	if (err)
		mlx4_err(dev, "QUERY_PORT command failed.\n");

	return err;
}

#define MLX4_A0_STEERING_TABLE_SIZE	256
R
Roland Dreier 已提交
301
static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
302 303
{
	int err;
304
	int i;
305 306 307

	err = mlx4_QUERY_DEV_CAP(dev, dev_cap);
	if (err) {
J
Joe Perches 已提交
308
		mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting\n");
309 310
		return err;
	}
311
	mlx4_dev_cap_dump(dev, dev_cap);
312 313

	if (dev_cap->min_page_sz > PAGE_SIZE) {
J
Joe Perches 已提交
314
		mlx4_err(dev, "HCA minimum page size of %d bigger than kernel PAGE_SIZE of %ld, aborting\n",
315 316 317 318
			 dev_cap->min_page_sz, PAGE_SIZE);
		return -ENODEV;
	}
	if (dev_cap->num_ports > MLX4_MAX_PORTS) {
J
Joe Perches 已提交
319
		mlx4_err(dev, "HCA has %d ports, but we only support %d, aborting\n",
320 321 322 323
			 dev_cap->num_ports, MLX4_MAX_PORTS);
		return -ENODEV;
	}

324
	if (dev_cap->uar_size > pci_resource_len(dev->persist->pdev, 2)) {
J
Joe Perches 已提交
325
		mlx4_err(dev, "HCA reported UAR size of 0x%x bigger than PCI resource 2 size of 0x%llx, aborting\n",
326
			 dev_cap->uar_size,
327 328
			 (unsigned long long)
			 pci_resource_len(dev->persist->pdev, 2));
329 330 331 332
		return -ENODEV;
	}

	dev->caps.num_ports	     = dev_cap->num_ports;
333 334 335 336
	dev->caps.num_sys_eqs = dev_cap->num_sys_eqs;
	dev->phys_caps.num_phys_eqs = dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS ?
				      dev->caps.num_sys_eqs :
				      MLX4_MAX_EQ_NUM;
337
	for (i = 1; i <= dev->caps.num_ports; ++i) {
M
Matan Barak 已提交
338 339 340 341 342
		err = _mlx4_dev_port(dev, i, dev_cap->port_cap + i);
		if (err) {
			mlx4_err(dev, "QUERY_PORT command failed, aborting\n");
			return err;
		}
343 344
	}

345
	dev->caps.uar_page_size	     = PAGE_SIZE;
346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366
	dev->caps.num_uars	     = dev_cap->uar_size / PAGE_SIZE;
	dev->caps.local_ca_ack_delay = dev_cap->local_ca_ack_delay;
	dev->caps.bf_reg_size	     = dev_cap->bf_reg_size;
	dev->caps.bf_regs_per_page   = dev_cap->bf_regs_per_page;
	dev->caps.max_sq_sg	     = dev_cap->max_sq_sg;
	dev->caps.max_rq_sg	     = dev_cap->max_rq_sg;
	dev->caps.max_wqes	     = dev_cap->max_qp_sz;
	dev->caps.max_qp_init_rdma   = dev_cap->max_requester_per_qp;
	dev->caps.max_srq_wqes	     = dev_cap->max_srq_sz;
	dev->caps.max_srq_sge	     = dev_cap->max_rq_sg - 1;
	dev->caps.reserved_srqs	     = dev_cap->reserved_srqs;
	dev->caps.max_sq_desc_sz     = dev_cap->max_sq_desc_sz;
	dev->caps.max_rq_desc_sz     = dev_cap->max_rq_desc_sz;
	/*
	 * Subtract 1 from the limit because we need to allocate a
	 * spare CQE so the HCA HW can tell the difference between an
	 * empty CQ and a full CQ.
	 */
	dev->caps.max_cqes	     = dev_cap->max_cq_sz - 1;
	dev->caps.reserved_cqs	     = dev_cap->reserved_cqs;
	dev->caps.reserved_eqs	     = dev_cap->reserved_eqs;
367
	dev->caps.reserved_mtts      = dev_cap->reserved_mtts;
368
	dev->caps.reserved_mrws	     = dev_cap->reserved_mrws;
369 370 371

	/* The first 128 UARs are used for EQ doorbells */
	dev->caps.reserved_uars	     = max_t(int, 128, dev_cap->reserved_uars);
372
	dev->caps.reserved_pds	     = dev_cap->reserved_pds;
S
Sean Hefty 已提交
373 374 375 376
	dev->caps.reserved_xrcds     = (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) ?
					dev_cap->reserved_xrcds : 0;
	dev->caps.max_xrcds          = (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) ?
					dev_cap->max_xrcds : 0;
377 378
	dev->caps.mtt_entry_sz       = dev_cap->mtt_entry_sz;

379
	dev->caps.max_msg_sz         = dev_cap->max_msg_sz;
380 381
	dev->caps.page_size_cap	     = ~(u32) (dev_cap->min_page_sz - 1);
	dev->caps.flags		     = dev_cap->flags;
382
	dev->caps.flags2	     = dev_cap->flags2;
383 384
	dev->caps.bmme_flags	     = dev_cap->bmme_flags;
	dev->caps.reserved_lkey	     = dev_cap->reserved_lkey;
385
	dev->caps.stat_rate_support  = dev_cap->stat_rate_support;
E
Eli Cohen 已提交
386
	dev->caps.max_gso_sz	     = dev_cap->max_gso_sz;
387
	dev->caps.max_rss_tbl_sz     = dev_cap->max_rss_tbl_sz;
388

389 390
	/* Sense port always allowed on supported devices for ConnectX-1 and -2 */
	if (mlx4_priv(dev)->pci_dev_data & MLX4_PCI_DEV_FORCE_SENSE_PORT)
391
		dev->caps.flags |= MLX4_DEV_CAP_FLAG_SENSE_SUPPORT;
392 393 394
	/* Don't do sense port on multifunction devices (for now at least) */
	if (mlx4_is_mfunc(dev))
		dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_SENSE_SUPPORT;
395

396 397 398 399 400 401 402
	if (mlx4_low_memory_profile()) {
		dev->caps.log_num_macs  = MLX4_MIN_LOG_NUM_MAC;
		dev->caps.log_num_vlans = MLX4_MIN_LOG_NUM_VLANS;
	} else {
		dev->caps.log_num_macs  = log_num_mac;
		dev->caps.log_num_vlans = MLX4_LOG_NUM_VLANS;
	}
403 404

	for (i = 1; i <= dev->caps.num_ports; ++i) {
405 406 407 408 409
		dev->caps.port_type[i] = MLX4_PORT_TYPE_NONE;
		if (dev->caps.supported_type[i]) {
			/* if only ETH is supported - assign ETH */
			if (dev->caps.supported_type[i] == MLX4_PORT_TYPE_ETH)
				dev->caps.port_type[i] = MLX4_PORT_TYPE_ETH;
410
			/* if only IB is supported, assign IB */
411
			else if (dev->caps.supported_type[i] ==
412 413
				 MLX4_PORT_TYPE_IB)
				dev->caps.port_type[i] = MLX4_PORT_TYPE_IB;
414
			else {
415 416 417 418
				/* if IB and ETH are supported, we set the port
				 * type according to user selection of port type;
				 * if user selected none, take the FW hint */
				if (port_type_array[i - 1] == MLX4_PORT_TYPE_NONE)
419 420
					dev->caps.port_type[i] = dev->caps.suggested_type[i] ?
						MLX4_PORT_TYPE_ETH : MLX4_PORT_TYPE_IB;
421
				else
422
					dev->caps.port_type[i] = port_type_array[i - 1];
423 424
			}
		}
425 426 427 428 429 430
		/*
		 * Link sensing is allowed on the port if 3 conditions are true:
		 * 1. Both protocols are supported on the port.
		 * 2. Different types are supported on the port
		 * 3. FW declared that it supports link sensing
		 */
431
		mlx4_priv(dev)->sense.sense_allowed[i] =
432
			((dev->caps.supported_type[i] == MLX4_PORT_TYPE_AUTO) &&
433
			 (dev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP) &&
434
			 (dev->caps.flags & MLX4_DEV_CAP_FLAG_SENSE_SUPPORT));
435

436 437 438 439 440
		/*
		 * If "default_sense" bit is set, we move the port to "AUTO" mode
		 * and perform sense_port FW command to try and set the correct
		 * port type from beginning
		 */
441
		if (mlx4_priv(dev)->sense.sense_allowed[i] && dev->caps.default_sense[i]) {
442 443 444 445 446 447 448 449 450
			enum mlx4_port_type sensed_port = MLX4_PORT_TYPE_NONE;
			dev->caps.possible_type[i] = MLX4_PORT_TYPE_AUTO;
			mlx4_SENSE_PORT(dev, i, &sensed_port);
			if (sensed_port != MLX4_PORT_TYPE_NONE)
				dev->caps.port_type[i] = sensed_port;
		} else {
			dev->caps.possible_type[i] = dev->caps.port_type[i];
		}

M
Matan Barak 已提交
451 452
		if (dev->caps.log_num_macs > dev_cap->port_cap[i].log_max_macs) {
			dev->caps.log_num_macs = dev_cap->port_cap[i].log_max_macs;
J
Joe Perches 已提交
453
			mlx4_warn(dev, "Requested number of MACs is too much for port %d, reducing to %d\n",
454 455
				  i, 1 << dev->caps.log_num_macs);
		}
M
Matan Barak 已提交
456 457
		if (dev->caps.log_num_vlans > dev_cap->port_cap[i].log_max_vlans) {
			dev->caps.log_num_vlans = dev_cap->port_cap[i].log_max_vlans;
J
Joe Perches 已提交
458
			mlx4_warn(dev, "Requested number of VLANs is too much for port %d, reducing to %d\n",
459 460 461 462
				  i, 1 << dev->caps.log_num_vlans);
		}
	}

463 464
	dev->caps.max_counters = 1 << ilog2(dev_cap->max_counters);

465 466 467 468 469 470 471
	dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW] = dev_cap->reserved_qps;
	dev->caps.reserved_qps_cnt[MLX4_QP_REGION_ETH_ADDR] =
		dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_ADDR] =
		(1 << dev->caps.log_num_macs) *
		(1 << dev->caps.log_num_vlans) *
		dev->caps.num_ports;
	dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_EXCH] = MLX4_NUM_FEXCH;
472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491

	if (dev_cap->dmfs_high_rate_qpn_base > 0 &&
	    dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_FS_EN)
		dev->caps.dmfs_high_rate_qpn_base = dev_cap->dmfs_high_rate_qpn_base;
	else
		dev->caps.dmfs_high_rate_qpn_base =
			dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW];

	if (dev_cap->dmfs_high_rate_qpn_range > 0 &&
	    dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_FS_EN) {
		dev->caps.dmfs_high_rate_qpn_range = dev_cap->dmfs_high_rate_qpn_range;
		dev->caps.dmfs_high_steer_mode = MLX4_STEERING_DMFS_A0_DEFAULT;
		dev->caps.flags2 |= MLX4_DEV_CAP_FLAG2_FS_A0;
	} else {
		dev->caps.dmfs_high_steer_mode = MLX4_STEERING_DMFS_A0_NOT_SUPPORTED;
		dev->caps.dmfs_high_rate_qpn_base =
			dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW];
		dev->caps.dmfs_high_rate_qpn_range = MLX4_A0_STEERING_TABLE_SIZE;
	}

492 493
	dev->caps.rl_caps = dev_cap->rl_caps;

M
Matan Barak 已提交
494
	dev->caps.reserved_qps_cnt[MLX4_QP_REGION_RSS_RAW_ETH] =
495
		dev->caps.dmfs_high_rate_qpn_range;
496 497 498 499 500 501

	dev->caps.reserved_qps = dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW] +
		dev->caps.reserved_qps_cnt[MLX4_QP_REGION_ETH_ADDR] +
		dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_ADDR] +
		dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_EXCH];

502
	dev->caps.sqp_demux = (mlx4_is_master(dev)) ? MLX4_MAX_NUM_SLAVES : 0;
O
Or Gerlitz 已提交
503

504
	if (!enable_64b_cqe_eqe && !mlx4_is_slave(dev)) {
O
Or Gerlitz 已提交
505 506 507 508 509 510
		if (dev_cap->flags &
		    (MLX4_DEV_CAP_FLAG_64B_CQE | MLX4_DEV_CAP_FLAG_64B_EQE)) {
			mlx4_warn(dev, "64B EQEs/CQEs supported by the device but not enabled\n");
			dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_64B_CQE;
			dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_64B_EQE;
		}
511 512 513 514 515 516 517 518

		if (dev_cap->flags2 &
		    (MLX4_DEV_CAP_FLAG2_CQE_STRIDE |
		     MLX4_DEV_CAP_FLAG2_EQE_STRIDE)) {
			mlx4_warn(dev, "Disabling EQE/CQE stride per user request\n");
			dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_CQE_STRIDE;
			dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_EQE_STRIDE;
		}
O
Or Gerlitz 已提交
519 520
	}

521
	if ((dev->caps.flags &
O
Or Gerlitz 已提交
522 523 524 525
	    (MLX4_DEV_CAP_FLAG_64B_CQE | MLX4_DEV_CAP_FLAG_64B_EQE)) &&
	    mlx4_is_master(dev))
		dev->caps.function_caps |= MLX4_FUNC_CAP_64B_EQE_CQE;

526
	if (!mlx4_is_slave(dev)) {
527
		mlx4_enable_cqe_eqe_stride(dev);
528
		dev->caps.alloc_res_qp_mask =
M
Matan Barak 已提交
529 530
			(dev->caps.bf_reg_size ? MLX4_RESERVE_ETH_BF_QP : 0) |
			MLX4_RESERVE_A0_QP;
531 532 533
	} else {
		dev->caps.alloc_res_qp_mask = 0;
	}
534

535 536
	return 0;
}
537 538 539 540 541 542 543 544 545 546 547 548 549

static int mlx4_get_pcie_dev_link_caps(struct mlx4_dev *dev,
				       enum pci_bus_speed *speed,
				       enum pcie_link_width *width)
{
	u32 lnkcap1, lnkcap2;
	int err1, err2;

#define  PCIE_MLW_CAP_SHIFT 4	/* start of MLW mask in link capabilities */

	*speed = PCI_SPEED_UNKNOWN;
	*width = PCIE_LNK_WIDTH_UNKNOWN;

550 551 552 553
	err1 = pcie_capability_read_dword(dev->persist->pdev, PCI_EXP_LNKCAP,
					  &lnkcap1);
	err2 = pcie_capability_read_dword(dev->persist->pdev, PCI_EXP_LNKCAP2,
					  &lnkcap2);
554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597
	if (!err2 && lnkcap2) { /* PCIe r3.0-compliant */
		if (lnkcap2 & PCI_EXP_LNKCAP2_SLS_8_0GB)
			*speed = PCIE_SPEED_8_0GT;
		else if (lnkcap2 & PCI_EXP_LNKCAP2_SLS_5_0GB)
			*speed = PCIE_SPEED_5_0GT;
		else if (lnkcap2 & PCI_EXP_LNKCAP2_SLS_2_5GB)
			*speed = PCIE_SPEED_2_5GT;
	}
	if (!err1) {
		*width = (lnkcap1 & PCI_EXP_LNKCAP_MLW) >> PCIE_MLW_CAP_SHIFT;
		if (!lnkcap2) { /* pre-r3.0 */
			if (lnkcap1 & PCI_EXP_LNKCAP_SLS_5_0GB)
				*speed = PCIE_SPEED_5_0GT;
			else if (lnkcap1 & PCI_EXP_LNKCAP_SLS_2_5GB)
				*speed = PCIE_SPEED_2_5GT;
		}
	}

	if (*speed == PCI_SPEED_UNKNOWN || *width == PCIE_LNK_WIDTH_UNKNOWN) {
		return err1 ? err1 :
			err2 ? err2 : -EINVAL;
	}
	return 0;
}

static void mlx4_check_pcie_caps(struct mlx4_dev *dev)
{
	enum pcie_link_width width, width_cap;
	enum pci_bus_speed speed, speed_cap;
	int err;

#define PCIE_SPEED_STR(speed) \
	(speed == PCIE_SPEED_8_0GT ? "8.0GT/s" : \
	 speed == PCIE_SPEED_5_0GT ? "5.0GT/s" : \
	 speed == PCIE_SPEED_2_5GT ? "2.5GT/s" : \
	 "Unknown")

	err = mlx4_get_pcie_dev_link_caps(dev, &speed_cap, &width_cap);
	if (err) {
		mlx4_warn(dev,
			  "Unable to determine PCIe device BW capabilities\n");
		return;
	}

598
	err = pcie_get_minimum_link(dev->persist->pdev, &speed, &width);
599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616
	if (err || speed == PCI_SPEED_UNKNOWN ||
	    width == PCIE_LNK_WIDTH_UNKNOWN) {
		mlx4_warn(dev,
			  "Unable to determine PCI device chain minimum BW\n");
		return;
	}

	if (width != width_cap || speed != speed_cap)
		mlx4_warn(dev,
			  "PCIe BW is different than device's capability\n");

	mlx4_info(dev, "PCIe link speed is %s, device supports %s\n",
		  PCIE_SPEED_STR(speed), PCIE_SPEED_STR(speed_cap));
	mlx4_info(dev, "PCIe link width is x%d, device supports x%d\n",
		  width, width_cap);
	return;
}

617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636
/*The function checks if there are live vf, return the num of them*/
static int mlx4_how_many_lives_vf(struct mlx4_dev *dev)
{
	struct mlx4_priv *priv = mlx4_priv(dev);
	struct mlx4_slave_state *s_state;
	int i;
	int ret = 0;

	for (i = 1/*the ppf is 0*/; i < dev->num_slaves; ++i) {
		s_state = &priv->mfunc.master.slave_state[i];
		if (s_state->active && s_state->last_cmd !=
		    MLX4_COMM_CMD_RESET) {
			mlx4_warn(dev, "%s: slave: %d is still active\n",
				  __func__, i);
			ret++;
		}
	}
	return ret;
}

637 638 639
int mlx4_get_parav_qkey(struct mlx4_dev *dev, u32 qpn, u32 *qkey)
{
	u32 qk = MLX4_RESERVED_QKEY_BASE;
640 641 642

	if (qpn >= dev->phys_caps.base_tunnel_sqpn + 8 * MLX4_MFUNC_MAX ||
	    qpn < dev->phys_caps.base_proxy_sqpn)
643 644
		return -EINVAL;

645
	if (qpn >= dev->phys_caps.base_tunnel_sqpn)
646
		/* tunnel qp */
647
		qk += qpn - dev->phys_caps.base_tunnel_sqpn;
648
	else
649
		qk += qpn - dev->phys_caps.base_proxy_sqpn;
650 651 652 653 654
	*qkey = qk;
	return 0;
}
EXPORT_SYMBOL(mlx4_get_parav_qkey);

655 656 657 658 659 660 661 662 663 664 665
void mlx4_sync_pkey_table(struct mlx4_dev *dev, int slave, int port, int i, int val)
{
	struct mlx4_priv *priv = container_of(dev, struct mlx4_priv, dev);

	if (!mlx4_is_master(dev))
		return;

	priv->virt2phys_pkey[slave][port - 1][i] = val;
}
EXPORT_SYMBOL(mlx4_sync_pkey_table);

666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687
void mlx4_put_slave_node_guid(struct mlx4_dev *dev, int slave, __be64 guid)
{
	struct mlx4_priv *priv = container_of(dev, struct mlx4_priv, dev);

	if (!mlx4_is_master(dev))
		return;

	priv->slave_node_guids[slave] = guid;
}
EXPORT_SYMBOL(mlx4_put_slave_node_guid);

__be64 mlx4_get_slave_node_guid(struct mlx4_dev *dev, int slave)
{
	struct mlx4_priv *priv = container_of(dev, struct mlx4_priv, dev);

	if (!mlx4_is_master(dev))
		return 0;

	return priv->slave_node_guids[slave];
}
EXPORT_SYMBOL(mlx4_get_slave_node_guid);

688
int mlx4_is_slave_active(struct mlx4_dev *dev, int slave)
689 690 691 692 693 694 695 696 697 698 699 700
{
	struct mlx4_priv *priv = mlx4_priv(dev);
	struct mlx4_slave_state *s_slave;

	if (!mlx4_is_master(dev))
		return 0;

	s_slave = &priv->mfunc.master.slave_state[slave];
	return !!s_slave->active;
}
EXPORT_SYMBOL(mlx4_is_slave_active);

701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717
static void slave_adjust_steering_mode(struct mlx4_dev *dev,
				       struct mlx4_dev_cap *dev_cap,
				       struct mlx4_init_hca_param *hca_param)
{
	dev->caps.steering_mode = hca_param->steering_mode;
	if (dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED) {
		dev->caps.num_qp_per_mgm = dev_cap->fs_max_num_qp_per_entry;
		dev->caps.fs_log_max_ucast_qp_range_size =
			dev_cap->fs_log_max_ucast_qp_range_size;
	} else
		dev->caps.num_qp_per_mgm =
			4 * ((1 << hca_param->log_mc_entry_sz)/16 - 2);

	mlx4_dbg(dev, "Steering mode is: %s\n",
		 mlx4_steering_mode_str(dev->caps.steering_mode));
}

718 719 720 721 722 723 724
static int mlx4_slave_cap(struct mlx4_dev *dev)
{
	int			   err;
	u32			   page_size;
	struct mlx4_dev_cap	   dev_cap;
	struct mlx4_func_cap	   func_cap;
	struct mlx4_init_hca_param hca_param;
725
	u8			   i;
726 727 728 729

	memset(&hca_param, 0, sizeof(hca_param));
	err = mlx4_QUERY_HCA(dev, &hca_param);
	if (err) {
J
Joe Perches 已提交
730
		mlx4_err(dev, "QUERY_HCA command failed, aborting\n");
731 732 733
		return err;
	}

734 735 736 737
	/* fail if the hca has an unknown global capability
	 * at this time global_caps should be always zeroed
	 */
	if (hca_param.global_caps) {
738 739 740 741 742 743
		mlx4_err(dev, "Unknown hca global capabilities\n");
		return -ENOSYS;
	}

	mlx4_log_num_mgm_entry_size = hca_param.log_mc_entry_sz;

744 745
	dev->caps.hca_core_clock = hca_param.hca_core_clock;

746
	memset(&dev_cap, 0, sizeof(dev_cap));
747
	dev->caps.max_qp_dest_rdma = 1 << hca_param.log_rd_per_qp;
748 749
	err = mlx4_dev_cap(dev, &dev_cap);
	if (err) {
J
Joe Perches 已提交
750
		mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting\n");
751 752 753
		return err;
	}

754 755
	err = mlx4_QUERY_FW(dev);
	if (err)
J
Joe Perches 已提交
756
		mlx4_err(dev, "QUERY_FW command failed: could not get FW version\n");
757

758 759 760
	page_size = ~dev->caps.page_size_cap + 1;
	mlx4_warn(dev, "HCA minimum page size:%d\n", page_size);
	if (page_size > PAGE_SIZE) {
J
Joe Perches 已提交
761
		mlx4_err(dev, "HCA minimum page size of %d bigger than kernel PAGE_SIZE of %ld, aborting\n",
762 763 764 765 766 767 768 769 770 771 772 773 774 775 776
			 page_size, PAGE_SIZE);
		return -ENODEV;
	}

	/* slave gets uar page size from QUERY_HCA fw command */
	dev->caps.uar_page_size = 1 << (hca_param.uar_page_sz + 12);

	/* TODO: relax this assumption */
	if (dev->caps.uar_page_size != PAGE_SIZE) {
		mlx4_err(dev, "UAR size:%d != kernel PAGE_SIZE of %ld\n",
			 dev->caps.uar_page_size, PAGE_SIZE);
		return -ENODEV;
	}

	memset(&func_cap, 0, sizeof(func_cap));
777
	err = mlx4_QUERY_FUNC_CAP(dev, 0, &func_cap);
778
	if (err) {
J
Joe Perches 已提交
779 780
		mlx4_err(dev, "QUERY_FUNC_CAP general command failed, aborting (%d)\n",
			 err);
781 782 783 784 785
		return err;
	}

	if ((func_cap.pf_context_behaviour | PF_CONTEXT_BEHAVIOUR_MASK) !=
	    PF_CONTEXT_BEHAVIOUR_MASK) {
786 787
		mlx4_err(dev, "Unknown pf context behaviour %x known flags %x\n",
			 func_cap.pf_context_behaviour, PF_CONTEXT_BEHAVIOUR_MASK);
788 789 790 791
		return -ENOSYS;
	}

	dev->caps.num_ports		= func_cap.num_ports;
792 793 794 795 796 797 798 799 800 801 802
	dev->quotas.qp			= func_cap.qp_quota;
	dev->quotas.srq			= func_cap.srq_quota;
	dev->quotas.cq			= func_cap.cq_quota;
	dev->quotas.mpt			= func_cap.mpt_quota;
	dev->quotas.mtt			= func_cap.mtt_quota;
	dev->caps.num_qps		= 1 << hca_param.log_num_qps;
	dev->caps.num_srqs		= 1 << hca_param.log_num_srqs;
	dev->caps.num_cqs		= 1 << hca_param.log_num_cqs;
	dev->caps.num_mpts		= 1 << hca_param.log_mpt_sz;
	dev->caps.num_eqs		= func_cap.max_eq;
	dev->caps.reserved_eqs		= func_cap.reserved_eq;
803
	dev->caps.reserved_lkey		= func_cap.reserved_lkey;
804 805 806 807 808
	dev->caps.num_pds               = MLX4_NUM_PDS;
	dev->caps.num_mgms              = 0;
	dev->caps.num_amgms             = 0;

	if (dev->caps.num_ports > MLX4_MAX_PORTS) {
J
Joe Perches 已提交
809 810
		mlx4_err(dev, "HCA has %d ports, but we only support %d, aborting\n",
			 dev->caps.num_ports, MLX4_MAX_PORTS);
811 812 813
		return -ENODEV;
	}

814
	dev->caps.qp0_qkey = kcalloc(dev->caps.num_ports, sizeof(u32), GFP_KERNEL);
815 816 817 818 819 820
	dev->caps.qp0_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
	dev->caps.qp0_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
	dev->caps.qp1_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
	dev->caps.qp1_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);

	if (!dev->caps.qp0_tunnel || !dev->caps.qp0_proxy ||
821 822
	    !dev->caps.qp1_tunnel || !dev->caps.qp1_proxy ||
	    !dev->caps.qp0_qkey) {
823 824 825 826
		err = -ENOMEM;
		goto err_mem;
	}

827
	for (i = 1; i <= dev->caps.num_ports; ++i) {
828
		err = mlx4_QUERY_FUNC_CAP(dev, i, &func_cap);
829
		if (err) {
J
Joe Perches 已提交
830 831
			mlx4_err(dev, "QUERY_FUNC_CAP port command failed for port %d, aborting (%d)\n",
				 i, err);
832 833
			goto err_mem;
		}
834
		dev->caps.qp0_qkey[i - 1] = func_cap.qp0_qkey;
835 836 837 838
		dev->caps.qp0_tunnel[i - 1] = func_cap.qp0_tunnel_qpn;
		dev->caps.qp0_proxy[i - 1] = func_cap.qp0_proxy_qpn;
		dev->caps.qp1_tunnel[i - 1] = func_cap.qp1_tunnel_qpn;
		dev->caps.qp1_proxy[i - 1] = func_cap.qp1_proxy_qpn;
839
		dev->caps.port_mask[i] = dev->caps.port_type[i];
840
		dev->caps.phys_port_id[i] = func_cap.phys_port_id;
841 842 843
		if (mlx4_get_slave_pkey_gid_tbl_len(dev, i,
						    &dev->caps.gid_table_len[i],
						    &dev->caps.pkey_table_len[i]))
844
			goto err_mem;
845
	}
846

847 848
	if (dev->caps.uar_page_size * (dev->caps.num_uars -
				       dev->caps.reserved_uars) >
849 850
				       pci_resource_len(dev->persist->pdev,
							2)) {
J
Joe Perches 已提交
851
		mlx4_err(dev, "HCA reported UAR region size of 0x%x bigger than PCI resource 2 size of 0x%llx, aborting\n",
852
			 dev->caps.uar_page_size * dev->caps.num_uars,
853 854
			 (unsigned long long)
			 pci_resource_len(dev->persist->pdev, 2));
855
		goto err_mem;
856 857
	}

O
Or Gerlitz 已提交
858 859 860 861 862 863 864 865 866 867
	if (hca_param.dev_cap_enabled & MLX4_DEV_CAP_64B_EQE_ENABLED) {
		dev->caps.eqe_size   = 64;
		dev->caps.eqe_factor = 1;
	} else {
		dev->caps.eqe_size   = 32;
		dev->caps.eqe_factor = 0;
	}

	if (hca_param.dev_cap_enabled & MLX4_DEV_CAP_64B_CQE_ENABLED) {
		dev->caps.cqe_size   = 64;
868
		dev->caps.userspace_caps |= MLX4_USER_DEV_CAP_LARGE_CQE;
O
Or Gerlitz 已提交
869 870 871 872
	} else {
		dev->caps.cqe_size   = 32;
	}

873 874 875 876 877 878 879 880 881 882 883
	if (hca_param.dev_cap_enabled & MLX4_DEV_CAP_EQE_STRIDE_ENABLED) {
		dev->caps.eqe_size = hca_param.eqe_size;
		dev->caps.eqe_factor = 0;
	}

	if (hca_param.dev_cap_enabled & MLX4_DEV_CAP_CQE_STRIDE_ENABLED) {
		dev->caps.cqe_size = hca_param.cqe_size;
		/* User still need to know when CQE > 32B */
		dev->caps.userspace_caps |= MLX4_USER_DEV_CAP_LARGE_CQE;
	}

884
	dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS;
J
Joe Perches 已提交
885
	mlx4_warn(dev, "Timestamping is not supported in slave mode\n");
886

887 888
	slave_adjust_steering_mode(dev, &dev_cap, &hca_param);

889 890 891 892
	if (func_cap.extra_flags & MLX4_QUERY_FUNC_FLAGS_BF_RES_QP &&
	    dev->caps.bf_reg_size)
		dev->caps.alloc_res_qp_mask |= MLX4_RESERVE_ETH_BF_QP;

M
Matan Barak 已提交
893 894 895
	if (func_cap.extra_flags & MLX4_QUERY_FUNC_FLAGS_A0_RES_QP)
		dev->caps.alloc_res_qp_mask |= MLX4_RESERVE_A0_QP;

896
	return 0;
897 898

err_mem:
899
	kfree(dev->caps.qp0_qkey);
900 901 902 903
	kfree(dev->caps.qp0_tunnel);
	kfree(dev->caps.qp0_proxy);
	kfree(dev->caps.qp1_tunnel);
	kfree(dev->caps.qp1_proxy);
904 905 906 907 908
	dev->caps.qp0_qkey = NULL;
	dev->caps.qp0_tunnel = NULL;
	dev->caps.qp0_proxy = NULL;
	dev->caps.qp1_tunnel = NULL;
	dev->caps.qp1_proxy = NULL;
909 910

	return err;
911
}
912

913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929
static void mlx4_request_modules(struct mlx4_dev *dev)
{
	int port;
	int has_ib_port = false;
	int has_eth_port = false;
#define EN_DRV_NAME	"mlx4_en"
#define IB_DRV_NAME	"mlx4_ib"

	for (port = 1; port <= dev->caps.num_ports; port++) {
		if (dev->caps.port_type[port] == MLX4_PORT_TYPE_IB)
			has_ib_port = true;
		else if (dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH)
			has_eth_port = true;
	}

	if (has_eth_port)
		request_module_nowait(EN_DRV_NAME);
930 931
	if (has_ib_port || (dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE))
		request_module_nowait(IB_DRV_NAME);
932 933
}

934 935 936 937
/*
 * Change the port configuration of the device.
 * Every user of this function must hold the port mutex.
 */
938 939
int mlx4_change_port_types(struct mlx4_dev *dev,
			   enum mlx4_port_type *port_types)
940 941 942 943 944 945
{
	int err = 0;
	int change = 0;
	int port;

	for (port = 0; port <  dev->caps.num_ports; port++) {
946 947
		/* Change the port type only if the new type is different
		 * from the current, and not set to Auto */
948
		if (port_types[port] != dev->caps.port_type[port + 1])
949 950 951 952 953 954
			change = 1;
	}
	if (change) {
		mlx4_unregister_device(dev);
		for (port = 1; port <= dev->caps.num_ports; port++) {
			mlx4_CLOSE_PORT(dev, port);
955
			dev->caps.port_type[port] = port_types[port - 1];
956
			err = mlx4_SET_PORT(dev, port, -1);
957
			if (err) {
J
Joe Perches 已提交
958 959
				mlx4_err(dev, "Failed to set port %d, aborting\n",
					 port);
960 961 962 963 964
				goto out;
			}
		}
		mlx4_set_port_mask(dev);
		err = mlx4_register_device(dev);
965 966 967 968 969
		if (err) {
			mlx4_err(dev, "Failed to register device\n");
			goto out;
		}
		mlx4_request_modules(dev);
970 971 972 973 974 975 976 977 978 979 980 981 982
	}

out:
	return err;
}

static ssize_t show_port_type(struct device *dev,
			      struct device_attribute *attr,
			      char *buf)
{
	struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info,
						   port_attr);
	struct mlx4_dev *mdev = info->dev;
983 984 985 986 987 988 989 990 991
	char type[8];

	sprintf(type, "%s",
		(mdev->caps.port_type[info->port] == MLX4_PORT_TYPE_IB) ?
		"ib" : "eth");
	if (mdev->caps.possible_type[info->port] == MLX4_PORT_TYPE_AUTO)
		sprintf(buf, "auto (%s)\n", type);
	else
		sprintf(buf, "%s\n", type);
992

993
	return strlen(buf);
994 995 996 997 998 999 1000 1001 1002 1003 1004
}

static ssize_t set_port_type(struct device *dev,
			     struct device_attribute *attr,
			     const char *buf, size_t count)
{
	struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info,
						   port_attr);
	struct mlx4_dev *mdev = info->dev;
	struct mlx4_priv *priv = mlx4_priv(mdev);
	enum mlx4_port_type types[MLX4_MAX_PORTS];
1005
	enum mlx4_port_type new_types[MLX4_MAX_PORTS];
1006
	static DEFINE_MUTEX(set_port_type_mutex);
1007 1008 1009
	int i;
	int err = 0;

1010 1011
	mutex_lock(&set_port_type_mutex);

1012 1013 1014 1015
	if (!strcmp(buf, "ib\n"))
		info->tmp_type = MLX4_PORT_TYPE_IB;
	else if (!strcmp(buf, "eth\n"))
		info->tmp_type = MLX4_PORT_TYPE_ETH;
1016 1017
	else if (!strcmp(buf, "auto\n"))
		info->tmp_type = MLX4_PORT_TYPE_AUTO;
1018 1019
	else {
		mlx4_err(mdev, "%s is not supported port type\n", buf);
1020 1021
		err = -EINVAL;
		goto err_out;
1022 1023
	}

1024
	mlx4_stop_sense(mdev);
1025
	mutex_lock(&priv->port_mutex);
1026 1027 1028 1029
	/* Possible type is always the one that was delivered */
	mdev->caps.possible_type[info->port] = info->tmp_type;

	for (i = 0; i < mdev->caps.num_ports; i++) {
1030
		types[i] = priv->port[i+1].tmp_type ? priv->port[i+1].tmp_type :
1031 1032 1033 1034
					mdev->caps.possible_type[i+1];
		if (types[i] == MLX4_PORT_TYPE_AUTO)
			types[i] = mdev->caps.port_type[i+1];
	}
1035

1036 1037
	if (!(mdev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP) &&
	    !(mdev->caps.flags & MLX4_DEV_CAP_FLAG_SENSE_SUPPORT)) {
1038 1039 1040 1041 1042 1043 1044 1045
		for (i = 1; i <= mdev->caps.num_ports; i++) {
			if (mdev->caps.possible_type[i] == MLX4_PORT_TYPE_AUTO) {
				mdev->caps.possible_type[i] = mdev->caps.port_type[i];
				err = -EINVAL;
			}
		}
	}
	if (err) {
J
Joe Perches 已提交
1046
		mlx4_err(mdev, "Auto sensing is not supported on this HCA. Set only 'eth' or 'ib' for both ports (should be the same)\n");
1047 1048 1049 1050 1051 1052
		goto out;
	}

	mlx4_do_sense_ports(mdev, new_types, types);

	err = mlx4_check_port_params(mdev, new_types);
1053 1054 1055
	if (err)
		goto out;

1056 1057 1058 1059 1060
	/* We are about to apply the changes after the configuration
	 * was verified, no need to remember the temporary types
	 * any more */
	for (i = 0; i < mdev->caps.num_ports; i++)
		priv->port[i + 1].tmp_type = 0;
1061

1062
	err = mlx4_change_port_types(mdev, new_types);
1063 1064

out:
1065
	mlx4_start_sense(mdev);
1066
	mutex_unlock(&priv->port_mutex);
1067 1068 1069
err_out:
	mutex_unlock(&set_port_type_mutex);

1070 1071 1072
	return err ? err : count;
}

1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135
enum ibta_mtu {
	IB_MTU_256  = 1,
	IB_MTU_512  = 2,
	IB_MTU_1024 = 3,
	IB_MTU_2048 = 4,
	IB_MTU_4096 = 5
};

static inline int int_to_ibta_mtu(int mtu)
{
	switch (mtu) {
	case 256:  return IB_MTU_256;
	case 512:  return IB_MTU_512;
	case 1024: return IB_MTU_1024;
	case 2048: return IB_MTU_2048;
	case 4096: return IB_MTU_4096;
	default: return -1;
	}
}

static inline int ibta_mtu_to_int(enum ibta_mtu mtu)
{
	switch (mtu) {
	case IB_MTU_256:  return  256;
	case IB_MTU_512:  return  512;
	case IB_MTU_1024: return 1024;
	case IB_MTU_2048: return 2048;
	case IB_MTU_4096: return 4096;
	default: return -1;
	}
}

static ssize_t show_port_ib_mtu(struct device *dev,
			     struct device_attribute *attr,
			     char *buf)
{
	struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info,
						   port_mtu_attr);
	struct mlx4_dev *mdev = info->dev;

	if (mdev->caps.port_type[info->port] == MLX4_PORT_TYPE_ETH)
		mlx4_warn(mdev, "port level mtu is only used for IB ports\n");

	sprintf(buf, "%d\n",
			ibta_mtu_to_int(mdev->caps.port_ib_mtu[info->port]));
	return strlen(buf);
}

static ssize_t set_port_ib_mtu(struct device *dev,
			     struct device_attribute *attr,
			     const char *buf, size_t count)
{
	struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info,
						   port_mtu_attr);
	struct mlx4_dev *mdev = info->dev;
	struct mlx4_priv *priv = mlx4_priv(mdev);
	int err, port, mtu, ibta_mtu = -1;

	if (mdev->caps.port_type[info->port] == MLX4_PORT_TYPE_ETH) {
		mlx4_warn(mdev, "port level mtu is only used for IB ports\n");
		return -EINVAL;
	}

1136 1137
	err = kstrtoint(buf, 0, &mtu);
	if (!err)
1138 1139
		ibta_mtu = int_to_ibta_mtu(mtu);

1140
	if (err || ibta_mtu < 0) {
1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151
		mlx4_err(mdev, "%s is invalid IBTA mtu\n", buf);
		return -EINVAL;
	}

	mdev->caps.port_ib_mtu[info->port] = ibta_mtu;

	mlx4_stop_sense(mdev);
	mutex_lock(&priv->port_mutex);
	mlx4_unregister_device(mdev);
	for (port = 1; port <= mdev->caps.num_ports; port++) {
		mlx4_CLOSE_PORT(mdev, port);
1152
		err = mlx4_SET_PORT(mdev, port, -1);
1153
		if (err) {
J
Joe Perches 已提交
1154 1155
			mlx4_err(mdev, "Failed to set port %d, aborting\n",
				 port);
1156 1157 1158 1159 1160 1161 1162 1163 1164 1165
			goto err_set_port;
		}
	}
	err = mlx4_register_device(mdev);
err_set_port:
	mutex_unlock(&priv->port_mutex);
	mlx4_start_sense(mdev);
	return err ? err : count;
}

1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250
int mlx4_bond(struct mlx4_dev *dev)
{
	int ret = 0;
	struct mlx4_priv *priv = mlx4_priv(dev);

	mutex_lock(&priv->bond_mutex);

	if (!mlx4_is_bonded(dev))
		ret = mlx4_do_bond(dev, true);
	else
		ret = 0;

	mutex_unlock(&priv->bond_mutex);
	if (ret)
		mlx4_err(dev, "Failed to bond device: %d\n", ret);
	else
		mlx4_dbg(dev, "Device is bonded\n");
	return ret;
}
EXPORT_SYMBOL_GPL(mlx4_bond);

int mlx4_unbond(struct mlx4_dev *dev)
{
	int ret = 0;
	struct mlx4_priv *priv = mlx4_priv(dev);

	mutex_lock(&priv->bond_mutex);

	if (mlx4_is_bonded(dev))
		ret = mlx4_do_bond(dev, false);

	mutex_unlock(&priv->bond_mutex);
	if (ret)
		mlx4_err(dev, "Failed to unbond device: %d\n", ret);
	else
		mlx4_dbg(dev, "Device is unbonded\n");
	return ret;
}
EXPORT_SYMBOL_GPL(mlx4_unbond);


int mlx4_port_map_set(struct mlx4_dev *dev, struct mlx4_port_map *v2p)
{
	u8 port1 = v2p->port1;
	u8 port2 = v2p->port2;
	struct mlx4_priv *priv = mlx4_priv(dev);
	int err;

	if (!(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_PORT_REMAP))
		return -ENOTSUPP;

	mutex_lock(&priv->bond_mutex);

	/* zero means keep current mapping for this port */
	if (port1 == 0)
		port1 = priv->v2p.port1;
	if (port2 == 0)
		port2 = priv->v2p.port2;

	if ((port1 < 1) || (port1 > MLX4_MAX_PORTS) ||
	    (port2 < 1) || (port2 > MLX4_MAX_PORTS) ||
	    (port1 == 2 && port2 == 1)) {
		/* besides boundary checks cross mapping makes
		 * no sense and therefore not allowed */
		err = -EINVAL;
	} else if ((port1 == priv->v2p.port1) &&
		 (port2 == priv->v2p.port2)) {
		err = 0;
	} else {
		err = mlx4_virt2phy_port_map(dev, port1, port2);
		if (!err) {
			mlx4_dbg(dev, "port map changed: [%d][%d]\n",
				 port1, port2);
			priv->v2p.port1 = port1;
			priv->v2p.port2 = port2;
		} else {
			mlx4_err(dev, "Failed to change port mape: %d\n", err);
		}
	}

	mutex_unlock(&priv->bond_mutex);
	return err;
}
EXPORT_SYMBOL_GPL(mlx4_port_map_set);

1251
static int mlx4_load_fw(struct mlx4_dev *dev)
1252 1253 1254 1255 1256
{
	struct mlx4_priv *priv = mlx4_priv(dev);
	int err;

	priv->fw.fw_icm = mlx4_alloc_icm(dev, priv->fw.fw_pages,
1257
					 GFP_HIGHUSER | __GFP_NOWARN, 0);
1258
	if (!priv->fw.fw_icm) {
J
Joe Perches 已提交
1259
		mlx4_err(dev, "Couldn't allocate FW area, aborting\n");
1260 1261 1262 1263 1264
		return -ENOMEM;
	}

	err = mlx4_MAP_FA(dev, priv->fw.fw_icm);
	if (err) {
J
Joe Perches 已提交
1265
		mlx4_err(dev, "MAP_FA command failed, aborting\n");
1266 1267 1268 1269 1270
		goto err_free;
	}

	err = mlx4_RUN_FW(dev);
	if (err) {
J
Joe Perches 已提交
1271
		mlx4_err(dev, "RUN_FW command failed, aborting\n");
1272 1273 1274 1275 1276 1277 1278 1279 1280
		goto err_unmap_fa;
	}

	return 0;

err_unmap_fa:
	mlx4_UNMAP_FA(dev);

err_free:
1281
	mlx4_free_icm(dev, priv->fw.fw_icm, 0);
1282 1283 1284
	return err;
}

1285 1286
static int mlx4_init_cmpt_table(struct mlx4_dev *dev, u64 cmpt_base,
				int cmpt_entry_sz)
1287 1288 1289
{
	struct mlx4_priv *priv = mlx4_priv(dev);
	int err;
1290
	int num_eqs;
1291 1292 1293 1294 1295 1296

	err = mlx4_init_icm_table(dev, &priv->qp_table.cmpt_table,
				  cmpt_base +
				  ((u64) (MLX4_CMPT_TYPE_QP *
					  cmpt_entry_sz) << MLX4_CMPT_SHIFT),
				  cmpt_entry_sz, dev->caps.num_qps,
1297 1298
				  dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW],
				  0, 0);
1299 1300 1301 1302 1303 1304 1305 1306
	if (err)
		goto err;

	err = mlx4_init_icm_table(dev, &priv->srq_table.cmpt_table,
				  cmpt_base +
				  ((u64) (MLX4_CMPT_TYPE_SRQ *
					  cmpt_entry_sz) << MLX4_CMPT_SHIFT),
				  cmpt_entry_sz, dev->caps.num_srqs,
1307
				  dev->caps.reserved_srqs, 0, 0);
1308 1309 1310 1311 1312 1313 1314 1315
	if (err)
		goto err_qp;

	err = mlx4_init_icm_table(dev, &priv->cq_table.cmpt_table,
				  cmpt_base +
				  ((u64) (MLX4_CMPT_TYPE_CQ *
					  cmpt_entry_sz) << MLX4_CMPT_SHIFT),
				  cmpt_entry_sz, dev->caps.num_cqs,
1316
				  dev->caps.reserved_cqs, 0, 0);
1317 1318 1319
	if (err)
		goto err_srq;

1320
	num_eqs = dev->phys_caps.num_phys_eqs;
1321 1322 1323 1324
	err = mlx4_init_icm_table(dev, &priv->eq_table.cmpt_table,
				  cmpt_base +
				  ((u64) (MLX4_CMPT_TYPE_EQ *
					  cmpt_entry_sz) << MLX4_CMPT_SHIFT),
1325
				  cmpt_entry_sz, num_eqs, num_eqs, 0, 0);
1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343
	if (err)
		goto err_cq;

	return 0;

err_cq:
	mlx4_cleanup_icm_table(dev, &priv->cq_table.cmpt_table);

err_srq:
	mlx4_cleanup_icm_table(dev, &priv->srq_table.cmpt_table);

err_qp:
	mlx4_cleanup_icm_table(dev, &priv->qp_table.cmpt_table);

err:
	return err;
}

R
Roland Dreier 已提交
1344 1345
static int mlx4_init_icm(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap,
			 struct mlx4_init_hca_param *init_hca, u64 icm_size)
1346 1347 1348
{
	struct mlx4_priv *priv = mlx4_priv(dev);
	u64 aux_pages;
1349
	int num_eqs;
1350 1351 1352 1353
	int err;

	err = mlx4_SET_ICM_SIZE(dev, icm_size, &aux_pages);
	if (err) {
J
Joe Perches 已提交
1354
		mlx4_err(dev, "SET_ICM_SIZE command failed, aborting\n");
1355 1356 1357
		return err;
	}

J
Joe Perches 已提交
1358
	mlx4_dbg(dev, "%lld KB of HCA context requires %lld KB aux memory\n",
1359 1360 1361 1362
		 (unsigned long long) icm_size >> 10,
		 (unsigned long long) aux_pages << 2);

	priv->fw.aux_icm = mlx4_alloc_icm(dev, aux_pages,
1363
					  GFP_HIGHUSER | __GFP_NOWARN, 0);
1364
	if (!priv->fw.aux_icm) {
J
Joe Perches 已提交
1365
		mlx4_err(dev, "Couldn't allocate aux memory, aborting\n");
1366 1367 1368 1369 1370
		return -ENOMEM;
	}

	err = mlx4_MAP_ICM_AUX(dev, priv->fw.aux_icm);
	if (err) {
J
Joe Perches 已提交
1371
		mlx4_err(dev, "MAP_ICM_AUX command failed, aborting\n");
1372 1373 1374 1375 1376
		goto err_free_aux;
	}

	err = mlx4_init_cmpt_table(dev, init_hca->cmpt_base, dev_cap->cmpt_entry_sz);
	if (err) {
J
Joe Perches 已提交
1377
		mlx4_err(dev, "Failed to map cMPT context memory, aborting\n");
1378 1379 1380
		goto err_unmap_aux;
	}

1381

1382
	num_eqs = dev->phys_caps.num_phys_eqs;
1383 1384
	err = mlx4_init_icm_table(dev, &priv->eq_table.table,
				  init_hca->eqc_base, dev_cap->eqc_entry_sz,
1385
				  num_eqs, num_eqs, 0, 0);
1386
	if (err) {
J
Joe Perches 已提交
1387
		mlx4_err(dev, "Failed to map EQ context memory, aborting\n");
1388 1389 1390
		goto err_unmap_cmpt;
	}

1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401
	/*
	 * Reserved MTT entries must be aligned up to a cacheline
	 * boundary, since the FW will write to them, while the driver
	 * writes to all other MTT entries. (The variable
	 * dev->caps.mtt_entry_sz below is really the MTT segment
	 * size, not the raw entry size)
	 */
	dev->caps.reserved_mtts =
		ALIGN(dev->caps.reserved_mtts * dev->caps.mtt_entry_sz,
		      dma_get_cache_alignment()) / dev->caps.mtt_entry_sz;

1402 1403 1404
	err = mlx4_init_icm_table(dev, &priv->mr_table.mtt_table,
				  init_hca->mtt_base,
				  dev->caps.mtt_entry_sz,
1405
				  dev->caps.num_mtts,
1406
				  dev->caps.reserved_mtts, 1, 0);
1407
	if (err) {
J
Joe Perches 已提交
1408
		mlx4_err(dev, "Failed to map MTT context memory, aborting\n");
1409 1410 1411 1412 1413 1414 1415
		goto err_unmap_eq;
	}

	err = mlx4_init_icm_table(dev, &priv->mr_table.dmpt_table,
				  init_hca->dmpt_base,
				  dev_cap->dmpt_entry_sz,
				  dev->caps.num_mpts,
1416
				  dev->caps.reserved_mrws, 1, 1);
1417
	if (err) {
J
Joe Perches 已提交
1418
		mlx4_err(dev, "Failed to map dMPT context memory, aborting\n");
1419 1420 1421 1422 1423 1424 1425
		goto err_unmap_mtt;
	}

	err = mlx4_init_icm_table(dev, &priv->qp_table.qp_table,
				  init_hca->qpc_base,
				  dev_cap->qpc_entry_sz,
				  dev->caps.num_qps,
1426 1427
				  dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW],
				  0, 0);
1428
	if (err) {
J
Joe Perches 已提交
1429
		mlx4_err(dev, "Failed to map QP context memory, aborting\n");
1430 1431 1432 1433 1434 1435 1436
		goto err_unmap_dmpt;
	}

	err = mlx4_init_icm_table(dev, &priv->qp_table.auxc_table,
				  init_hca->auxc_base,
				  dev_cap->aux_entry_sz,
				  dev->caps.num_qps,
1437 1438
				  dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW],
				  0, 0);
1439
	if (err) {
J
Joe Perches 已提交
1440
		mlx4_err(dev, "Failed to map AUXC context memory, aborting\n");
1441 1442 1443 1444 1445 1446 1447
		goto err_unmap_qp;
	}

	err = mlx4_init_icm_table(dev, &priv->qp_table.altc_table,
				  init_hca->altc_base,
				  dev_cap->altc_entry_sz,
				  dev->caps.num_qps,
1448 1449
				  dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW],
				  0, 0);
1450
	if (err) {
J
Joe Perches 已提交
1451
		mlx4_err(dev, "Failed to map ALTC context memory, aborting\n");
1452 1453 1454 1455 1456 1457 1458
		goto err_unmap_auxc;
	}

	err = mlx4_init_icm_table(dev, &priv->qp_table.rdmarc_table,
				  init_hca->rdmarc_base,
				  dev_cap->rdmarc_entry_sz << priv->qp_table.rdmarc_shift,
				  dev->caps.num_qps,
1459 1460
				  dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW],
				  0, 0);
1461 1462 1463 1464 1465 1466 1467 1468 1469
	if (err) {
		mlx4_err(dev, "Failed to map RDMARC context memory, aborting\n");
		goto err_unmap_altc;
	}

	err = mlx4_init_icm_table(dev, &priv->cq_table.table,
				  init_hca->cqc_base,
				  dev_cap->cqc_entry_sz,
				  dev->caps.num_cqs,
1470
				  dev->caps.reserved_cqs, 0, 0);
1471
	if (err) {
J
Joe Perches 已提交
1472
		mlx4_err(dev, "Failed to map CQ context memory, aborting\n");
1473 1474 1475 1476 1477 1478 1479
		goto err_unmap_rdmarc;
	}

	err = mlx4_init_icm_table(dev, &priv->srq_table.table,
				  init_hca->srqc_base,
				  dev_cap->srq_entry_sz,
				  dev->caps.num_srqs,
1480
				  dev->caps.reserved_srqs, 0, 0);
1481
	if (err) {
J
Joe Perches 已提交
1482
		mlx4_err(dev, "Failed to map SRQ context memory, aborting\n");
1483 1484 1485 1486
		goto err_unmap_cq;
	}

	/*
1487 1488 1489 1490 1491
	 * For flow steering device managed mode it is required to use
	 * mlx4_init_icm_table. For B0 steering mode it's not strictly
	 * required, but for simplicity just map the whole multicast
	 * group table now.  The table isn't very big and it's a lot
	 * easier than trying to track ref counts.
1492 1493
	 */
	err = mlx4_init_icm_table(dev, &priv->mcg_table.table,
1494 1495
				  init_hca->mc_base,
				  mlx4_get_mgm_entry_size(dev),
1496 1497
				  dev->caps.num_mgms + dev->caps.num_amgms,
				  dev->caps.num_mgms + dev->caps.num_amgms,
1498
				  0, 0);
1499
	if (err) {
J
Joe Perches 已提交
1500
		mlx4_err(dev, "Failed to map MCG context memory, aborting\n");
1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530
		goto err_unmap_srq;
	}

	return 0;

err_unmap_srq:
	mlx4_cleanup_icm_table(dev, &priv->srq_table.table);

err_unmap_cq:
	mlx4_cleanup_icm_table(dev, &priv->cq_table.table);

err_unmap_rdmarc:
	mlx4_cleanup_icm_table(dev, &priv->qp_table.rdmarc_table);

err_unmap_altc:
	mlx4_cleanup_icm_table(dev, &priv->qp_table.altc_table);

err_unmap_auxc:
	mlx4_cleanup_icm_table(dev, &priv->qp_table.auxc_table);

err_unmap_qp:
	mlx4_cleanup_icm_table(dev, &priv->qp_table.qp_table);

err_unmap_dmpt:
	mlx4_cleanup_icm_table(dev, &priv->mr_table.dmpt_table);

err_unmap_mtt:
	mlx4_cleanup_icm_table(dev, &priv->mr_table.mtt_table);

err_unmap_eq:
1531
	mlx4_cleanup_icm_table(dev, &priv->eq_table.table);
1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542

err_unmap_cmpt:
	mlx4_cleanup_icm_table(dev, &priv->eq_table.cmpt_table);
	mlx4_cleanup_icm_table(dev, &priv->cq_table.cmpt_table);
	mlx4_cleanup_icm_table(dev, &priv->srq_table.cmpt_table);
	mlx4_cleanup_icm_table(dev, &priv->qp_table.cmpt_table);

err_unmap_aux:
	mlx4_UNMAP_ICM_AUX(dev);

err_free_aux:
1543
	mlx4_free_icm(dev, priv->fw.aux_icm, 0);
1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560

	return err;
}

static void mlx4_free_icms(struct mlx4_dev *dev)
{
	struct mlx4_priv *priv = mlx4_priv(dev);

	mlx4_cleanup_icm_table(dev, &priv->mcg_table.table);
	mlx4_cleanup_icm_table(dev, &priv->srq_table.table);
	mlx4_cleanup_icm_table(dev, &priv->cq_table.table);
	mlx4_cleanup_icm_table(dev, &priv->qp_table.rdmarc_table);
	mlx4_cleanup_icm_table(dev, &priv->qp_table.altc_table);
	mlx4_cleanup_icm_table(dev, &priv->qp_table.auxc_table);
	mlx4_cleanup_icm_table(dev, &priv->qp_table.qp_table);
	mlx4_cleanup_icm_table(dev, &priv->mr_table.dmpt_table);
	mlx4_cleanup_icm_table(dev, &priv->mr_table.mtt_table);
1561
	mlx4_cleanup_icm_table(dev, &priv->eq_table.table);
1562 1563 1564 1565 1566 1567
	mlx4_cleanup_icm_table(dev, &priv->eq_table.cmpt_table);
	mlx4_cleanup_icm_table(dev, &priv->cq_table.cmpt_table);
	mlx4_cleanup_icm_table(dev, &priv->srq_table.cmpt_table);
	mlx4_cleanup_icm_table(dev, &priv->qp_table.cmpt_table);

	mlx4_UNMAP_ICM_AUX(dev);
1568
	mlx4_free_icm(dev, priv->fw.aux_icm, 0);
1569 1570
}

1571 1572 1573 1574
static void mlx4_slave_exit(struct mlx4_dev *dev)
{
	struct mlx4_priv *priv = mlx4_priv(dev);

1575
	mutex_lock(&priv->cmd.slave_cmd_mutex);
1576 1577
	if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, MLX4_COMM_CMD_NA_OP,
			  MLX4_COMM_TIME))
J
Joe Perches 已提交
1578
		mlx4_warn(dev, "Failed to close slave function\n");
1579
	mutex_unlock(&priv->cmd.slave_cmd_mutex);
1580 1581
}

1582 1583 1584 1585 1586 1587 1588
static int map_bf_area(struct mlx4_dev *dev)
{
	struct mlx4_priv *priv = mlx4_priv(dev);
	resource_size_t bf_start;
	resource_size_t bf_len;
	int err = 0;

1589 1590 1591
	if (!dev->caps.bf_reg_size)
		return -ENXIO;

1592
	bf_start = pci_resource_start(dev->persist->pdev, 2) +
1593
			(dev->caps.num_uars << PAGE_SHIFT);
1594
	bf_len = pci_resource_len(dev->persist->pdev, 2) -
1595
			(dev->caps.num_uars << PAGE_SHIFT);
1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608
	priv->bf_mapping = io_mapping_create_wc(bf_start, bf_len);
	if (!priv->bf_mapping)
		err = -ENOMEM;

	return err;
}

static void unmap_bf_area(struct mlx4_dev *dev)
{
	if (mlx4_priv(dev)->bf_mapping)
		io_mapping_free(mlx4_priv(dev)->bf_mapping);
}

1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630
cycle_t mlx4_read_clock(struct mlx4_dev *dev)
{
	u32 clockhi, clocklo, clockhi1;
	cycle_t cycles;
	int i;
	struct mlx4_priv *priv = mlx4_priv(dev);

	for (i = 0; i < 10; i++) {
		clockhi = swab32(readl(priv->clock_mapping));
		clocklo = swab32(readl(priv->clock_mapping + 4));
		clockhi1 = swab32(readl(priv->clock_mapping));
		if (clockhi == clockhi1)
			break;
	}

	cycles = (u64) clockhi << 32 | (u64) clocklo;

	return cycles;
}
EXPORT_SYMBOL_GPL(mlx4_read_clock);


1631 1632 1633 1634 1635
static int map_internal_clock(struct mlx4_dev *dev)
{
	struct mlx4_priv *priv = mlx4_priv(dev);

	priv->clock_mapping =
1636 1637
		ioremap(pci_resource_start(dev->persist->pdev,
					   priv->fw.clock_bar) +
1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653
			priv->fw.clock_offset, MLX4_CLOCK_SIZE);

	if (!priv->clock_mapping)
		return -ENOMEM;

	return 0;
}

static void unmap_internal_clock(struct mlx4_dev *dev)
{
	struct mlx4_priv *priv = mlx4_priv(dev);

	if (priv->clock_mapping)
		iounmap(priv->clock_mapping);
}

1654 1655
static void mlx4_close_hca(struct mlx4_dev *dev)
{
1656
	unmap_internal_clock(dev);
1657
	unmap_bf_area(dev);
1658 1659 1660 1661 1662
	if (mlx4_is_slave(dev))
		mlx4_slave_exit(dev);
	else {
		mlx4_CLOSE_HCA(dev, 0);
		mlx4_free_icms(dev);
1663 1664 1665 1666 1667 1668
	}
}

static void mlx4_close_fw(struct mlx4_dev *dev)
{
	if (!mlx4_is_slave(dev)) {
1669 1670 1671 1672 1673
		mlx4_UNMAP_FA(dev);
		mlx4_free_icm(dev, mlx4_priv(dev)->fw.fw_icm, 0);
	}
}

1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717
static int mlx4_comm_check_offline(struct mlx4_dev *dev)
{
#define COMM_CHAN_OFFLINE_OFFSET 0x09

	u32 comm_flags;
	u32 offline_bit;
	unsigned long end;
	struct mlx4_priv *priv = mlx4_priv(dev);

	end = msecs_to_jiffies(MLX4_COMM_OFFLINE_TIME_OUT) + jiffies;
	while (time_before(jiffies, end)) {
		comm_flags = swab32(readl((__iomem char *)priv->mfunc.comm +
					  MLX4_COMM_CHAN_FLAGS));
		offline_bit = (comm_flags &
			       (u32)(1 << COMM_CHAN_OFFLINE_OFFSET));
		if (!offline_bit)
			return 0;
		/* There are cases as part of AER/Reset flow that PF needs
		 * around 100 msec to load. We therefore sleep for 100 msec
		 * to allow other tasks to make use of that CPU during this
		 * time interval.
		 */
		msleep(100);
	}
	mlx4_err(dev, "Communication channel is offline.\n");
	return -EIO;
}

static void mlx4_reset_vf_support(struct mlx4_dev *dev)
{
#define COMM_CHAN_RST_OFFSET 0x1e

	struct mlx4_priv *priv = mlx4_priv(dev);
	u32 comm_rst;
	u32 comm_caps;

	comm_caps = swab32(readl((__iomem char *)priv->mfunc.comm +
				 MLX4_COMM_CHAN_CAPS));
	comm_rst = (comm_caps & (u32)(1 << COMM_CHAN_RST_OFFSET));

	if (comm_rst)
		dev->caps.vf_caps |= MLX4_VF_CAP_FLAG_RESET;
}

1718 1719 1720 1721 1722 1723 1724 1725
static int mlx4_init_slave(struct mlx4_dev *dev)
{
	struct mlx4_priv *priv = mlx4_priv(dev);
	u64 dma = (u64) priv->mfunc.vhcr_dma;
	int ret_from_reset = 0;
	u32 slave_read;
	u32 cmd_channel_ver;

1726
	if (atomic_read(&pf_loading)) {
J
Joe Perches 已提交
1727
		mlx4_warn(dev, "PF is not ready - Deferring probe\n");
1728 1729 1730
		return -EPROBE_DEFER;
	}

1731
	mutex_lock(&priv->cmd.slave_cmd_mutex);
1732
	priv->cmd.max_cmds = 1;
1733 1734 1735 1736 1737 1738
	if (mlx4_comm_check_offline(dev)) {
		mlx4_err(dev, "PF is not responsive, skipping initialization\n");
		goto err_offline;
	}

	mlx4_reset_vf_support(dev);
1739 1740
	mlx4_warn(dev, "Sending reset\n");
	ret_from_reset = mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0,
1741
				       MLX4_COMM_CMD_NA_OP, MLX4_COMM_TIME);
1742 1743 1744 1745
	/* if we are in the middle of flr the slave will try
	 * NUM_OF_RESET_RETRIES times before leaving.*/
	if (ret_from_reset) {
		if (MLX4_DELAY_RESET_SLAVE == ret_from_reset) {
J
Joe Perches 已提交
1746
			mlx4_warn(dev, "slave is currently in the middle of FLR - Deferring probe\n");
1747 1748
			mutex_unlock(&priv->cmd.slave_cmd_mutex);
			return -EPROBE_DEFER;
1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759
		} else
			goto err;
	}

	/* check the driver version - the slave I/F revision
	 * must match the master's */
	slave_read = swab32(readl(&priv->mfunc.comm->slave_read));
	cmd_channel_ver = mlx4_comm_get_version();

	if (MLX4_COMM_GET_IF_REV(cmd_channel_ver) !=
		MLX4_COMM_GET_IF_REV(slave_read)) {
J
Joe Perches 已提交
1760
		mlx4_err(dev, "slave driver version is not supported by the master\n");
1761 1762 1763 1764 1765
		goto err;
	}

	mlx4_warn(dev, "Sending vhcr0\n");
	if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR0, dma >> 48,
1766
			     MLX4_COMM_CMD_NA_OP, MLX4_COMM_TIME))
1767 1768
		goto err;
	if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR1, dma >> 32,
1769
			     MLX4_COMM_CMD_NA_OP, MLX4_COMM_TIME))
1770 1771
		goto err;
	if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR2, dma >> 16,
1772
			     MLX4_COMM_CMD_NA_OP, MLX4_COMM_TIME))
1773
		goto err;
1774 1775
	if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR_EN, dma,
			  MLX4_COMM_CMD_NA_OP, MLX4_COMM_TIME))
1776
		goto err;
1777 1778

	mutex_unlock(&priv->cmd.slave_cmd_mutex);
1779 1780 1781
	return 0;

err:
1782
	mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, MLX4_COMM_CMD_NA_OP, 0);
1783
err_offline:
1784
	mutex_unlock(&priv->cmd.slave_cmd_mutex);
1785
	return -EIO;
1786 1787
}

1788 1789 1790 1791 1792
static void mlx4_parav_master_pf_caps(struct mlx4_dev *dev)
{
	int i;

	for (i = 1; i <= dev->caps.num_ports; i++) {
1793 1794
		if (dev->caps.port_type[i] == MLX4_PORT_TYPE_ETH)
			dev->caps.gid_table_len[i] =
M
Matan Barak 已提交
1795
				mlx4_get_slave_num_gids(dev, 0, i);
1796 1797
		else
			dev->caps.gid_table_len[i] = 1;
1798 1799 1800 1801 1802
		dev->caps.pkey_table_len[i] =
			dev->phys_caps.pkey_phys_table_len[i] - 1;
	}
}

1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815
static int choose_log_fs_mgm_entry_size(int qp_per_entry)
{
	int i = MLX4_MIN_MGM_LOG_ENTRY_SIZE;

	for (i = MLX4_MIN_MGM_LOG_ENTRY_SIZE; i <= MLX4_MAX_MGM_LOG_ENTRY_SIZE;
	      i++) {
		if (qp_per_entry <= 4 * ((1 << i) / 16 - 2))
			break;
	}

	return (i <= MLX4_MAX_MGM_LOG_ENTRY_SIZE) ? i : -1;
}

1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840
static const char *dmfs_high_rate_steering_mode_str(int dmfs_high_steer_mode)
{
	switch (dmfs_high_steer_mode) {
	case MLX4_STEERING_DMFS_A0_DEFAULT:
		return "default performance";

	case MLX4_STEERING_DMFS_A0_DYNAMIC:
		return "dynamic hybrid mode";

	case MLX4_STEERING_DMFS_A0_STATIC:
		return "performance optimized for limited rule configuration (static)";

	case MLX4_STEERING_DMFS_A0_DISABLE:
		return "disabled performance optimized steering";

	case MLX4_STEERING_DMFS_A0_NOT_SUPPORTED:
		return "performance optimized steering not supported";

	default:
		return "Unrecognized mode";
	}
}

#define MLX4_DMFS_A0_STEERING			(1UL << 2)

1841 1842 1843
static void choose_steering_mode(struct mlx4_dev *dev,
				 struct mlx4_dev_cap *dev_cap)
{
1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855
	if (mlx4_log_num_mgm_entry_size <= 0) {
		if ((-mlx4_log_num_mgm_entry_size) & MLX4_DMFS_A0_STEERING) {
			if (dev->caps.dmfs_high_steer_mode ==
			    MLX4_STEERING_DMFS_A0_NOT_SUPPORTED)
				mlx4_err(dev, "DMFS high rate mode not supported\n");
			else
				dev->caps.dmfs_high_steer_mode =
					MLX4_STEERING_DMFS_A0_STATIC;
		}
	}

	if (mlx4_log_num_mgm_entry_size <= 0 &&
1856
	    dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_FS_EN &&
1857
	    (!mlx4_is_mfunc(dev) ||
1858 1859
	     (dev_cap->fs_max_num_qp_per_entry >=
	     (dev->persist->num_vfs + 1))) &&
1860 1861 1862 1863
	    choose_log_fs_mgm_entry_size(dev_cap->fs_max_num_qp_per_entry) >=
		MLX4_MIN_MGM_LOG_ENTRY_SIZE) {
		dev->oper_log_mgm_entry_size =
			choose_log_fs_mgm_entry_size(dev_cap->fs_max_num_qp_per_entry);
1864 1865 1866 1867 1868
		dev->caps.steering_mode = MLX4_STEERING_MODE_DEVICE_MANAGED;
		dev->caps.num_qp_per_mgm = dev_cap->fs_max_num_qp_per_entry;
		dev->caps.fs_log_max_ucast_qp_range_size =
			dev_cap->fs_log_max_ucast_qp_range_size;
	} else {
1869 1870 1871
		if (dev->caps.dmfs_high_steer_mode !=
		    MLX4_STEERING_DMFS_A0_NOT_SUPPORTED)
			dev->caps.dmfs_high_steer_mode = MLX4_STEERING_DMFS_A0_DISABLE;
1872 1873 1874 1875 1876 1877 1878 1879
		if (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER &&
		    dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER)
			dev->caps.steering_mode = MLX4_STEERING_MODE_B0;
		else {
			dev->caps.steering_mode = MLX4_STEERING_MODE_A0;

			if (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER ||
			    dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER)
J
Joe Perches 已提交
1880
				mlx4_warn(dev, "Must have both UC_STEER and MC_STEER flags set to use B0 steering - falling back to A0 steering mode\n");
1881
		}
1882 1883 1884 1885
		dev->oper_log_mgm_entry_size =
			mlx4_log_num_mgm_entry_size > 0 ?
			mlx4_log_num_mgm_entry_size :
			MLX4_DEFAULT_MGM_LOG_ENTRY_SIZE;
1886 1887
		dev->caps.num_qp_per_mgm = mlx4_get_qp_per_mgm(dev);
	}
J
Joe Perches 已提交
1888
	mlx4_dbg(dev, "Steering mode is: %s, oper_log_mgm_entry_size = %d, modparam log_num_mgm_entry_size = %d\n",
1889 1890 1891
		 mlx4_steering_mode_str(dev->caps.steering_mode),
		 dev->oper_log_mgm_entry_size,
		 mlx4_log_num_mgm_entry_size);
1892 1893
}

1894 1895 1896 1897
static void choose_tunnel_offload_mode(struct mlx4_dev *dev,
				       struct mlx4_dev_cap *dev_cap)
{
	if (dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED &&
1898
	    dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_VXLAN_OFFLOADS)
1899 1900 1901 1902 1903 1904 1905 1906
		dev->caps.tunnel_offload_mode = MLX4_TUNNEL_OFFLOAD_MODE_VXLAN;
	else
		dev->caps.tunnel_offload_mode = MLX4_TUNNEL_OFFLOAD_MODE_NONE;

	mlx4_dbg(dev, "Tunneling offload mode is: %s\n",  (dev->caps.tunnel_offload_mode
		 == MLX4_TUNNEL_OFFLOAD_MODE_VXLAN) ? "vxlan" : "none");
}

1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935
static int mlx4_validate_optimized_steering(struct mlx4_dev *dev)
{
	int i;
	struct mlx4_port_cap port_cap;

	if (dev->caps.dmfs_high_steer_mode == MLX4_STEERING_DMFS_A0_NOT_SUPPORTED)
		return -EINVAL;

	for (i = 1; i <= dev->caps.num_ports; i++) {
		if (mlx4_dev_port(dev, i, &port_cap)) {
			mlx4_err(dev,
				 "QUERY_DEV_CAP command failed, can't veify DMFS high rate steering.\n");
		} else if ((dev->caps.dmfs_high_steer_mode !=
			    MLX4_STEERING_DMFS_A0_DEFAULT) &&
			   (port_cap.dmfs_optimized_state ==
			    !!(dev->caps.dmfs_high_steer_mode ==
			    MLX4_STEERING_DMFS_A0_DISABLE))) {
			mlx4_err(dev,
				 "DMFS high rate steer mode differ, driver requested %s but %s in FW.\n",
				 dmfs_high_rate_steering_mode_str(
					dev->caps.dmfs_high_steer_mode),
				 (port_cap.dmfs_optimized_state ?
					"enabled" : "disabled"));
		}
	}

	return 0;
}

1936
static int mlx4_init_fw(struct mlx4_dev *dev)
1937
{
1938
	struct mlx4_mod_stat_cfg   mlx4_cfg;
1939
	int err = 0;
1940

1941 1942 1943 1944
	if (!mlx4_is_slave(dev)) {
		err = mlx4_QUERY_FW(dev);
		if (err) {
			if (err == -EACCES)
J
Joe Perches 已提交
1945
				mlx4_info(dev, "non-primary physical function, skipping\n");
1946
			else
J
Joe Perches 已提交
1947
				mlx4_err(dev, "QUERY_FW command failed, aborting\n");
1948
			return err;
1949
		}
1950

1951 1952
		err = mlx4_load_fw(dev);
		if (err) {
J
Joe Perches 已提交
1953
			mlx4_err(dev, "Failed to start FW, aborting\n");
1954
			return err;
1955
		}
1956

1957 1958 1959 1960 1961
		mlx4_cfg.log_pg_sz_m = 1;
		mlx4_cfg.log_pg_sz = 0;
		err = mlx4_MOD_STAT_CFG(dev, &mlx4_cfg);
		if (err)
			mlx4_warn(dev, "Failed to override log_pg_sz parameter\n");
1962
	}
1963

1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978
	return err;
}

static int mlx4_init_hca(struct mlx4_dev *dev)
{
	struct mlx4_priv	  *priv = mlx4_priv(dev);
	struct mlx4_adapter	   adapter;
	struct mlx4_dev_cap	   dev_cap;
	struct mlx4_profile	   profile;
	struct mlx4_init_hca_param init_hca;
	u64 icm_size;
	struct mlx4_config_dev_params params;
	int err;

	if (!mlx4_is_slave(dev)) {
1979 1980
		err = mlx4_dev_cap(dev, &dev_cap);
		if (err) {
J
Joe Perches 已提交
1981
			mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting\n");
1982
			return err;
1983
		}
1984

1985
		choose_steering_mode(dev, &dev_cap);
1986
		choose_tunnel_offload_mode(dev, &dev_cap);
1987

1988 1989 1990 1991
		if (dev->caps.dmfs_high_steer_mode == MLX4_STEERING_DMFS_A0_STATIC &&
		    mlx4_is_master(dev))
			dev->caps.function_caps |= MLX4_FUNC_CAP_DMFS_A0_STATIC;

1992 1993 1994 1995
		err = mlx4_get_phys_port_id(dev);
		if (err)
			mlx4_err(dev, "Fail to get physical port id\n");

1996 1997 1998
		if (mlx4_is_master(dev))
			mlx4_parav_master_pf_caps(dev);

1999 2000 2001 2002 2003 2004
		if (mlx4_low_memory_profile()) {
			mlx4_info(dev, "Running from within kdump kernel. Using low memory profile\n");
			profile = low_mem_profile;
		} else {
			profile = default_profile;
		}
2005 2006 2007
		if (dev->caps.steering_mode ==
		    MLX4_STEERING_MODE_DEVICE_MANAGED)
			profile.num_mcg = MLX4_FS_NUM_MCG;
2008

2009 2010 2011 2012
		icm_size = mlx4_make_profile(dev, &profile, &dev_cap,
					     &init_hca);
		if ((long long) icm_size < 0) {
			err = icm_size;
2013
			return err;
2014
		}
2015

2016 2017
		dev->caps.max_fmr_maps = (1 << (32 - ilog2(dev->caps.num_mpts))) - 1;

2018 2019
		init_hca.log_uar_sz = ilog2(dev->caps.num_uars);
		init_hca.uar_page_sz = PAGE_SHIFT - 12;
2020 2021 2022 2023
		init_hca.mw_enabled = 0;
		if (dev->caps.flags & MLX4_DEV_CAP_FLAG_MEM_WINDOW ||
		    dev->caps.bmme_flags & MLX4_BMME_FLAG_TYPE_2_WIN)
			init_hca.mw_enabled = INIT_HCA_TPT_MW_ENABLE;
2024

2025 2026
		err = mlx4_init_icm(dev, &dev_cap, &init_hca, icm_size);
		if (err)
2027
			return err;
2028

2029 2030
		err = mlx4_INIT_HCA(dev, &init_hca);
		if (err) {
J
Joe Perches 已提交
2031
			mlx4_err(dev, "INIT_HCA command failed, aborting\n");
2032 2033
			goto err_free_icm;
		}
2034 2035 2036 2037 2038

		if (dev_cap.flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS) {
			err = mlx4_query_func(dev, &dev_cap);
			if (err < 0) {
				mlx4_err(dev, "QUERY_FUNC command failed, aborting.\n");
2039
				goto err_close;
2040 2041 2042 2043 2044 2045 2046
			} else if (err & MLX4_QUERY_FUNC_NUM_SYS_EQS) {
				dev->caps.num_eqs = dev_cap.max_eqs;
				dev->caps.reserved_eqs = dev_cap.reserved_eqs;
				dev->caps.reserved_uars = dev_cap.reserved_uars;
			}
		}

2047 2048 2049 2050 2051 2052 2053 2054
		/*
		 * If TS is supported by FW
		 * read HCA frequency by QUERY_HCA command
		 */
		if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_TS) {
			memset(&init_hca, 0, sizeof(init_hca));
			err = mlx4_QUERY_HCA(dev, &init_hca);
			if (err) {
J
Joe Perches 已提交
2055
				mlx4_err(dev, "QUERY_HCA command failed, disable timestamp\n");
2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067
				dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS;
			} else {
				dev->caps.hca_core_clock =
					init_hca.hca_core_clock;
			}

			/* In case we got HCA frequency 0 - disable timestamping
			 * to avoid dividing by zero
			 */
			if (!dev->caps.hca_core_clock) {
				dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS;
				mlx4_err(dev,
J
Joe Perches 已提交
2068
					 "HCA frequency is 0 - timestamping is not supported\n");
2069 2070 2071 2072 2073 2074
			} else if (map_internal_clock(dev)) {
				/*
				 * Map internal clock,
				 * in case of failure disable timestamping
				 */
				dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS;
J
Joe Perches 已提交
2075
				mlx4_err(dev, "Failed to map internal clock. Timestamping is not supported\n");
2076 2077
			}
		}
2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095

		if (dev->caps.dmfs_high_steer_mode !=
		    MLX4_STEERING_DMFS_A0_NOT_SUPPORTED) {
			if (mlx4_validate_optimized_steering(dev))
				mlx4_warn(dev, "Optimized steering validation failed\n");

			if (dev->caps.dmfs_high_steer_mode ==
			    MLX4_STEERING_DMFS_A0_DISABLE) {
				dev->caps.dmfs_high_rate_qpn_base =
					dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW];
				dev->caps.dmfs_high_rate_qpn_range =
					MLX4_A0_STEERING_TABLE_SIZE;
			}

			mlx4_dbg(dev, "DMFS high rate steer mode is: %s\n",
				 dmfs_high_rate_steering_mode_str(
					dev->caps.dmfs_high_steer_mode));
		}
2096 2097 2098
	} else {
		err = mlx4_init_slave(dev);
		if (err) {
2099 2100
			if (err != -EPROBE_DEFER)
				mlx4_err(dev, "Failed to initialize slave\n");
2101
			return err;
2102
		}
2103

2104 2105 2106 2107 2108
		err = mlx4_slave_cap(dev);
		if (err) {
			mlx4_err(dev, "Failed to obtain slave caps\n");
			goto err_close;
		}
2109 2110
	}

2111 2112 2113 2114 2115 2116 2117
	if (map_bf_area(dev))
		mlx4_dbg(dev, "Failed to map blue flame area\n");

	/*Only the master set the ports, all the rest got it from it.*/
	if (!mlx4_is_slave(dev))
		mlx4_set_port_mask(dev);

2118 2119
	err = mlx4_QUERY_ADAPTER(dev, &adapter);
	if (err) {
J
Joe Perches 已提交
2120
		mlx4_err(dev, "QUERY_ADAPTER command failed, aborting\n");
2121
		goto unmap_bf;
2122 2123
	}

2124 2125 2126 2127 2128 2129 2130 2131
	/* Query CONFIG_DEV parameters */
	err = mlx4_config_dev_retrieval(dev, &params);
	if (err && err != -ENOTSUPP) {
		mlx4_err(dev, "Failed to query CONFIG_DEV parameters\n");
	} else if (!err) {
		dev->caps.rx_checksum_flags_port[1] = params.rx_csum_flags_port_1;
		dev->caps.rx_checksum_flags_port[2] = params.rx_csum_flags_port_2;
	}
2132
	priv->eq_table.inta_pin = adapter.inta_pin;
2133
	memcpy(dev->board_id, adapter.board_id, sizeof dev->board_id);
2134 2135 2136

	return 0;

2137
unmap_bf:
2138
	unmap_internal_clock(dev);
2139 2140
	unmap_bf_area(dev);

2141
	if (mlx4_is_slave(dev)) {
2142
		kfree(dev->caps.qp0_qkey);
2143 2144 2145 2146 2147 2148
		kfree(dev->caps.qp0_tunnel);
		kfree(dev->caps.qp0_proxy);
		kfree(dev->caps.qp1_tunnel);
		kfree(dev->caps.qp1_proxy);
	}

2149
err_close:
2150 2151 2152 2153
	if (mlx4_is_slave(dev))
		mlx4_slave_exit(dev);
	else
		mlx4_CLOSE_HCA(dev, 0);
2154 2155

err_free_icm:
2156 2157
	if (!mlx4_is_slave(dev))
		mlx4_free_icms(dev);
2158 2159 2160 2161

	return err;
}

2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178
static int mlx4_init_counters_table(struct mlx4_dev *dev)
{
	struct mlx4_priv *priv = mlx4_priv(dev);
	int nent;

	if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS))
		return -ENOENT;

	nent = dev->caps.max_counters;
	return mlx4_bitmap_init(&priv->counters_bitmap, nent, nent - 1, 0, 0);
}

static void mlx4_cleanup_counters_table(struct mlx4_dev *dev)
{
	mlx4_bitmap_cleanup(&mlx4_priv(dev)->counters_bitmap);
}

2179
int __mlx4_counter_alloc(struct mlx4_dev *dev, u32 *idx)
2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191
{
	struct mlx4_priv *priv = mlx4_priv(dev);

	if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS))
		return -ENOENT;

	*idx = mlx4_bitmap_alloc(&priv->counters_bitmap);
	if (*idx == -1)
		return -ENOMEM;

	return 0;
}
2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208

int mlx4_counter_alloc(struct mlx4_dev *dev, u32 *idx)
{
	u64 out_param;
	int err;

	if (mlx4_is_mfunc(dev)) {
		err = mlx4_cmd_imm(dev, 0, &out_param, RES_COUNTER,
				   RES_OP_RESERVE, MLX4_CMD_ALLOC_RES,
				   MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
		if (!err)
			*idx = get_param_l(&out_param);

		return err;
	}
	return __mlx4_counter_alloc(dev, idx);
}
2209 2210
EXPORT_SYMBOL_GPL(mlx4_counter_alloc);

2211
void __mlx4_counter_free(struct mlx4_dev *dev, u32 idx)
2212
{
2213
	mlx4_bitmap_free(&mlx4_priv(dev)->counters_bitmap, idx, MLX4_USE_RR);
2214 2215
	return;
}
2216 2217 2218

void mlx4_counter_free(struct mlx4_dev *dev, u32 idx)
{
2219
	u64 in_param = 0;
2220 2221 2222 2223 2224 2225 2226 2227 2228 2229

	if (mlx4_is_mfunc(dev)) {
		set_param_l(&in_param, idx);
		mlx4_cmd(dev, in_param, RES_COUNTER, RES_OP_RESERVE,
			 MLX4_CMD_FREE_RES, MLX4_CMD_TIME_CLASS_A,
			 MLX4_CMD_WRAPPED);
		return;
	}
	__mlx4_counter_free(dev, idx);
}
2230 2231
EXPORT_SYMBOL_GPL(mlx4_counter_free);

R
Roland Dreier 已提交
2232
static int mlx4_setup_hca(struct mlx4_dev *dev)
2233 2234 2235
{
	struct mlx4_priv *priv = mlx4_priv(dev);
	int err;
2236
	int port;
2237
	__be32 ib_port_default_caps;
2238 2239 2240

	err = mlx4_init_uar_table(dev);
	if (err) {
J
Joe Perches 已提交
2241 2242
		mlx4_err(dev, "Failed to initialize user access region table, aborting\n");
		 return err;
2243 2244 2245 2246
	}

	err = mlx4_uar_alloc(dev, &priv->driver_uar);
	if (err) {
J
Joe Perches 已提交
2247
		mlx4_err(dev, "Failed to allocate driver access region, aborting\n");
2248 2249 2250
		goto err_uar_table_free;
	}

2251
	priv->kar = ioremap((phys_addr_t) priv->driver_uar.pfn << PAGE_SHIFT, PAGE_SIZE);
2252
	if (!priv->kar) {
J
Joe Perches 已提交
2253
		mlx4_err(dev, "Couldn't map kernel access region, aborting\n");
2254 2255 2256 2257 2258 2259
		err = -ENOMEM;
		goto err_uar_free;
	}

	err = mlx4_init_pd_table(dev);
	if (err) {
J
Joe Perches 已提交
2260
		mlx4_err(dev, "Failed to initialize protection domain table, aborting\n");
2261 2262 2263
		goto err_kar_unmap;
	}

S
Sean Hefty 已提交
2264 2265
	err = mlx4_init_xrcd_table(dev);
	if (err) {
J
Joe Perches 已提交
2266
		mlx4_err(dev, "Failed to initialize reliable connection domain table, aborting\n");
S
Sean Hefty 已提交
2267 2268 2269
		goto err_pd_table_free;
	}

2270 2271
	err = mlx4_init_mr_table(dev);
	if (err) {
J
Joe Perches 已提交
2272
		mlx4_err(dev, "Failed to initialize memory region table, aborting\n");
S
Sean Hefty 已提交
2273
		goto err_xrcd_table_free;
2274 2275
	}

2276 2277 2278
	if (!mlx4_is_slave(dev)) {
		err = mlx4_init_mcg_table(dev);
		if (err) {
J
Joe Perches 已提交
2279
			mlx4_err(dev, "Failed to initialize multicast group table, aborting\n");
2280 2281
			goto err_mr_table_free;
		}
2282 2283 2284 2285 2286
		err = mlx4_config_mad_demux(dev);
		if (err) {
			mlx4_err(dev, "Failed in config_mad_demux, aborting\n");
			goto err_mcg_table_free;
		}
2287 2288
	}

2289 2290
	err = mlx4_init_eq_table(dev);
	if (err) {
J
Joe Perches 已提交
2291
		mlx4_err(dev, "Failed to initialize event queue table, aborting\n");
2292
		goto err_mcg_table_free;
2293 2294 2295 2296
	}

	err = mlx4_cmd_use_events(dev);
	if (err) {
J
Joe Perches 已提交
2297
		mlx4_err(dev, "Failed to switch to event-driven firmware commands, aborting\n");
2298 2299 2300 2301 2302
		goto err_eq_table_free;
	}

	err = mlx4_NOP(dev);
	if (err) {
2303
		if (dev->flags & MLX4_FLAG_MSI_X) {
J
Joe Perches 已提交
2304
			mlx4_warn(dev, "NOP command failed to generate MSI-X interrupt IRQ %d)\n",
2305
				  priv->eq_table.eq[dev->caps.num_comp_vectors].irq);
J
Joe Perches 已提交
2306
			mlx4_warn(dev, "Trying again without MSI-X\n");
2307
		} else {
J
Joe Perches 已提交
2308
			mlx4_err(dev, "NOP command failed to generate interrupt (IRQ %d), aborting\n",
2309
				 priv->eq_table.eq[dev->caps.num_comp_vectors].irq);
2310
			mlx4_err(dev, "BIOS or ACPI interrupt routing problem?\n");
2311
		}
2312 2313 2314 2315 2316 2317 2318 2319

		goto err_cmd_poll;
	}

	mlx4_dbg(dev, "NOP command IRQ test passed\n");

	err = mlx4_init_cq_table(dev);
	if (err) {
J
Joe Perches 已提交
2320
		mlx4_err(dev, "Failed to initialize completion queue table, aborting\n");
2321 2322 2323 2324 2325
		goto err_cmd_poll;
	}

	err = mlx4_init_srq_table(dev);
	if (err) {
J
Joe Perches 已提交
2326
		mlx4_err(dev, "Failed to initialize shared receive queue table, aborting\n");
2327 2328 2329 2330 2331
		goto err_cq_table_free;
	}

	err = mlx4_init_qp_table(dev);
	if (err) {
J
Joe Perches 已提交
2332
		mlx4_err(dev, "Failed to initialize queue pair table, aborting\n");
2333 2334 2335
		goto err_srq_table_free;
	}

2336 2337
	err = mlx4_init_counters_table(dev);
	if (err && err != -ENOENT) {
J
Joe Perches 已提交
2338
		mlx4_err(dev, "Failed to initialize counters table, aborting\n");
2339
		goto err_qp_table_free;
2340 2341
	}

2342 2343 2344 2345 2346 2347
	if (!mlx4_is_slave(dev)) {
		for (port = 1; port <= dev->caps.num_ports; port++) {
			ib_port_default_caps = 0;
			err = mlx4_get_port_ib_caps(dev, port,
						    &ib_port_default_caps);
			if (err)
J
Joe Perches 已提交
2348 2349
				mlx4_warn(dev, "failed to get port %d default ib capabilities (%d). Continuing with caps = 0\n",
					  port, err);
2350 2351
			dev->caps.ib_port_def_cap[port] = ib_port_default_caps;

2352 2353 2354 2355 2356 2357 2358
			/* initialize per-slave default ib port capabilities */
			if (mlx4_is_master(dev)) {
				int i;
				for (i = 0; i < dev->num_slaves; i++) {
					if (i == mlx4_master_func_num(dev))
						continue;
					priv->mfunc.master.slave_state[i].ib_cap_mask[port] =
J
Joe Perches 已提交
2359
						ib_port_default_caps;
2360 2361 2362
				}
			}

2363 2364 2365 2366
			if (mlx4_is_mfunc(dev))
				dev->caps.port_ib_mtu[port] = IB_MTU_2048;
			else
				dev->caps.port_ib_mtu[port] = IB_MTU_4096;
2367

2368 2369
			err = mlx4_SET_PORT(dev, port, mlx4_is_master(dev) ?
					    dev->caps.pkey_table_len[port] : -1);
2370 2371
			if (err) {
				mlx4_err(dev, "Failed to set port %d, aborting\n",
J
Joe Perches 已提交
2372
					 port);
2373 2374
				goto err_counters_table_free;
			}
2375 2376 2377
		}
	}

2378 2379
	return 0;

2380 2381 2382
err_counters_table_free:
	mlx4_cleanup_counters_table(dev);

2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397
err_qp_table_free:
	mlx4_cleanup_qp_table(dev);

err_srq_table_free:
	mlx4_cleanup_srq_table(dev);

err_cq_table_free:
	mlx4_cleanup_cq_table(dev);

err_cmd_poll:
	mlx4_cmd_use_polling(dev);

err_eq_table_free:
	mlx4_cleanup_eq_table(dev);

2398 2399 2400 2401
err_mcg_table_free:
	if (!mlx4_is_slave(dev))
		mlx4_cleanup_mcg_table(dev);

2402
err_mr_table_free:
2403 2404
	mlx4_cleanup_mr_table(dev);

S
Sean Hefty 已提交
2405 2406 2407
err_xrcd_table_free:
	mlx4_cleanup_xrcd_table(dev);

2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421
err_pd_table_free:
	mlx4_cleanup_pd_table(dev);

err_kar_unmap:
	iounmap(priv->kar);

err_uar_free:
	mlx4_uar_free(dev, &priv->driver_uar);

err_uar_table_free:
	mlx4_cleanup_uar_table(dev);
	return err;
}

2422
static void mlx4_enable_msi_x(struct mlx4_dev *dev)
2423 2424
{
	struct mlx4_priv *priv = mlx4_priv(dev);
2425
	struct msix_entry *entries;
2426 2427 2428
	int i;

	if (msi_x) {
2429 2430
		int nreq = dev->caps.num_ports * num_online_cpus() + MSIX_LEGACY_SZ;

2431 2432
		nreq = min_t(int, dev->caps.num_eqs - dev->caps.reserved_eqs,
			     nreq);
2433

2434 2435 2436 2437 2438
		entries = kcalloc(nreq, sizeof *entries, GFP_KERNEL);
		if (!entries)
			goto no_msi;

		for (i = 0; i < nreq; ++i)
2439 2440
			entries[i].entry = i;

2441 2442
		nreq = pci_enable_msix_range(dev->persist->pdev, entries, 2,
					     nreq);
2443 2444

		if (nreq < 0) {
2445
			kfree(entries);
2446
			goto no_msi;
2447
		} else if (nreq < MSIX_LEGACY_SZ +
J
Joe Perches 已提交
2448
			   dev->caps.num_ports * MIN_MSIX_P_PORT) {
2449 2450 2451 2452 2453 2454 2455
			/*Working in legacy mode , all EQ's shared*/
			dev->caps.comp_pool           = 0;
			dev->caps.num_comp_vectors = nreq - 1;
		} else {
			dev->caps.comp_pool           = nreq - MSIX_LEGACY_SZ;
			dev->caps.num_comp_vectors = MSIX_LEGACY_SZ - 1;
		}
2456
		for (i = 0; i < nreq; ++i)
2457 2458 2459
			priv->eq_table.eq[i].irq = entries[i].vector;

		dev->flags |= MLX4_FLAG_MSI_X;
2460 2461

		kfree(entries);
2462 2463 2464 2465
		return;
	}

no_msi:
2466
	dev->caps.num_comp_vectors = 1;
2467
	dev->caps.comp_pool	   = 0;
2468 2469

	for (i = 0; i < 2; ++i)
2470
		priv->eq_table.eq[i].irq = dev->persist->pdev->irq;
2471 2472
}

2473
static int mlx4_init_port_info(struct mlx4_dev *dev, int port)
2474 2475
{
	struct mlx4_port_info *info = &mlx4_priv(dev)->port[port];
2476
	int err = 0;
2477 2478 2479

	info->dev = dev;
	info->port = port;
2480 2481 2482
	if (!mlx4_is_slave(dev)) {
		mlx4_init_mac_table(dev, &info->mac_table);
		mlx4_init_vlan_table(dev, &info->vlan_table);
2483
		mlx4_init_roce_gid_table(dev, &info->gid_table);
2484
		info->base_qpn = mlx4_get_base_qpn(dev, port);
2485
	}
2486 2487 2488

	sprintf(info->dev_name, "mlx4_port%d", port);
	info->port_attr.attr.name = info->dev_name;
2489 2490 2491 2492 2493 2494
	if (mlx4_is_mfunc(dev))
		info->port_attr.attr.mode = S_IRUGO;
	else {
		info->port_attr.attr.mode = S_IRUGO | S_IWUSR;
		info->port_attr.store     = set_port_type;
	}
2495
	info->port_attr.show      = show_port_type;
2496
	sysfs_attr_init(&info->port_attr.attr);
2497

2498
	err = device_create_file(&dev->persist->pdev->dev, &info->port_attr);
2499 2500 2501 2502 2503
	if (err) {
		mlx4_err(dev, "Failed to create file for port %d\n", port);
		info->port = -1;
	}

2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514
	sprintf(info->dev_mtu_name, "mlx4_port%d_mtu", port);
	info->port_mtu_attr.attr.name = info->dev_mtu_name;
	if (mlx4_is_mfunc(dev))
		info->port_mtu_attr.attr.mode = S_IRUGO;
	else {
		info->port_mtu_attr.attr.mode = S_IRUGO | S_IWUSR;
		info->port_mtu_attr.store     = set_port_ib_mtu;
	}
	info->port_mtu_attr.show      = show_port_ib_mtu;
	sysfs_attr_init(&info->port_mtu_attr.attr);

2515 2516
	err = device_create_file(&dev->persist->pdev->dev,
				 &info->port_mtu_attr);
2517 2518
	if (err) {
		mlx4_err(dev, "Failed to create mtu file for port %d\n", port);
2519 2520
		device_remove_file(&info->dev->persist->pdev->dev,
				   &info->port_attr);
2521 2522 2523
		info->port = -1;
	}

2524 2525 2526 2527 2528 2529 2530 2531
	return err;
}

static void mlx4_cleanup_port_info(struct mlx4_port_info *info)
{
	if (info->port < 0)
		return;

2532 2533 2534
	device_remove_file(&info->dev->persist->pdev->dev, &info->port_attr);
	device_remove_file(&info->dev->persist->pdev->dev,
			   &info->port_mtu_attr);
2535 2536
}

2537 2538 2539 2540 2541 2542 2543 2544 2545 2546
static int mlx4_init_steering(struct mlx4_dev *dev)
{
	struct mlx4_priv *priv = mlx4_priv(dev);
	int num_entries = dev->caps.num_ports;
	int i, j;

	priv->steer = kzalloc(sizeof(struct mlx4_steer) * num_entries, GFP_KERNEL);
	if (!priv->steer)
		return -ENOMEM;

2547
	for (i = 0; i < num_entries; i++)
2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587
		for (j = 0; j < MLX4_NUM_STEERS; j++) {
			INIT_LIST_HEAD(&priv->steer[i].promisc_qps[j]);
			INIT_LIST_HEAD(&priv->steer[i].steer_entries[j]);
		}
	return 0;
}

static void mlx4_clear_steering(struct mlx4_dev *dev)
{
	struct mlx4_priv *priv = mlx4_priv(dev);
	struct mlx4_steer_index *entry, *tmp_entry;
	struct mlx4_promisc_qp *pqp, *tmp_pqp;
	int num_entries = dev->caps.num_ports;
	int i, j;

	for (i = 0; i < num_entries; i++) {
		for (j = 0; j < MLX4_NUM_STEERS; j++) {
			list_for_each_entry_safe(pqp, tmp_pqp,
						 &priv->steer[i].promisc_qps[j],
						 list) {
				list_del(&pqp->list);
				kfree(pqp);
			}
			list_for_each_entry_safe(entry, tmp_entry,
						 &priv->steer[i].steer_entries[j],
						 list) {
				list_del(&entry->list);
				list_for_each_entry_safe(pqp, tmp_pqp,
							 &entry->duplicates,
							 list) {
					list_del(&pqp->list);
					kfree(pqp);
				}
				kfree(entry);
			}
		}
	}
	kfree(priv->steer);
}

2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600
static int extended_func_num(struct pci_dev *pdev)
{
	return PCI_SLOT(pdev->devfn) * 8 + PCI_FUNC(pdev->devfn);
}

#define MLX4_OWNER_BASE	0x8069c
#define MLX4_OWNER_SIZE	4

static int mlx4_get_ownership(struct mlx4_dev *dev)
{
	void __iomem *owner;
	u32 ret;

2601
	if (pci_channel_offline(dev->persist->pdev))
2602 2603
		return -EIO;

2604 2605
	owner = ioremap(pci_resource_start(dev->persist->pdev, 0) +
			MLX4_OWNER_BASE,
2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620
			MLX4_OWNER_SIZE);
	if (!owner) {
		mlx4_err(dev, "Failed to obtain ownership bit\n");
		return -ENOMEM;
	}

	ret = readl(owner);
	iounmap(owner);
	return (int) !!ret;
}

static void mlx4_free_ownership(struct mlx4_dev *dev)
{
	void __iomem *owner;

2621
	if (pci_channel_offline(dev->persist->pdev))
2622 2623
		return;

2624 2625
	owner = ioremap(pci_resource_start(dev->persist->pdev, 0) +
			MLX4_OWNER_BASE,
2626 2627 2628 2629 2630 2631 2632 2633 2634 2635
			MLX4_OWNER_SIZE);
	if (!owner) {
		mlx4_err(dev, "Failed to obtain ownership bit\n");
		return;
	}
	writel(0, owner);
	msleep(1000);
	iounmap(owner);
}

2636 2637 2638 2639
#define SRIOV_VALID_STATE(flags) (!!((flags) & MLX4_FLAG_SRIOV)	==\
				  !!((flags) & MLX4_FLAG_MASTER))

static u64 mlx4_enable_sriov(struct mlx4_dev *dev, struct pci_dev *pdev,
2640
			     u8 total_vfs, int existing_vfs, int reset_flow)
2641 2642
{
	u64 dev_flags = dev->flags;
2643
	int err = 0;
2644

2645 2646 2647 2648 2649 2650 2651 2652
	if (reset_flow) {
		dev->dev_vfs = kcalloc(total_vfs, sizeof(*dev->dev_vfs),
				       GFP_KERNEL);
		if (!dev->dev_vfs)
			goto free_mem;
		return dev_flags;
	}

2653 2654 2655 2656 2657 2658 2659 2660 2661 2662
	atomic_inc(&pf_loading);
	if (dev->flags &  MLX4_FLAG_SRIOV) {
		if (existing_vfs != total_vfs) {
			mlx4_err(dev, "SR-IOV was already enabled, but with num_vfs (%d) different than requested (%d)\n",
				 existing_vfs, total_vfs);
			total_vfs = existing_vfs;
		}
	}

	dev->dev_vfs = kzalloc(total_vfs * sizeof(*dev->dev_vfs), GFP_KERNEL);
2663 2664 2665
	if (NULL == dev->dev_vfs) {
		mlx4_err(dev, "Failed to allocate memory for VFs\n");
		goto disable_sriov;
2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680
	}

	if (!(dev->flags &  MLX4_FLAG_SRIOV)) {
		mlx4_warn(dev, "Enabling SR-IOV with %d VFs\n", total_vfs);
		err = pci_enable_sriov(pdev, total_vfs);
	}
	if (err) {
		mlx4_err(dev, "Failed to enable SR-IOV, continuing without SR-IOV (err = %d)\n",
			 err);
		goto disable_sriov;
	} else {
		mlx4_warn(dev, "Running in master mode\n");
		dev_flags |= MLX4_FLAG_SRIOV |
			MLX4_FLAG_MASTER;
		dev_flags &= ~MLX4_FLAG_SLAVE;
2681
		dev->persist->num_vfs = total_vfs;
2682 2683 2684 2685
	}
	return dev_flags;

disable_sriov:
2686
	atomic_dec(&pf_loading);
2687
free_mem:
2688
	dev->persist->num_vfs = 0;
2689 2690 2691 2692
	kfree(dev->dev_vfs);
	return dev_flags & ~MLX4_FLAG_MASTER;
}

2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710
enum {
	MLX4_DEV_CAP_CHECK_NUM_VFS_ABOVE_64 = -1,
};

static int mlx4_check_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap,
			      int *nvfs)
{
	int requested_vfs = nvfs[0] + nvfs[1] + nvfs[2];
	/* Checking for 64 VFs as a limitation of CX2 */
	if (!(dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_80_VFS) &&
	    requested_vfs >= 64) {
		mlx4_err(dev, "Requested %d VFs, but FW does not support more than 64\n",
			 requested_vfs);
		return MLX4_DEV_CAP_CHECK_NUM_VFS_ABOVE_64;
	}
	return 0;
}

2711
static int mlx4_load_one(struct pci_dev *pdev, int pci_dev_data,
2712 2713
			 int total_vfs, int *nvfs, struct mlx4_priv *priv,
			 int reset_flow)
2714 2715
{
	struct mlx4_dev *dev;
2716
	unsigned sum = 0;
2717
	int err;
2718
	int port;
2719
	int i;
2720
	struct mlx4_dev_cap *dev_cap = NULL;
2721
	int existing_vfs = 0;
2722

2723
	dev = &priv->dev;
2724

2725 2726
	INIT_LIST_HEAD(&priv->ctx_list);
	spin_lock_init(&priv->ctx_lock);
2727

2728
	mutex_init(&priv->port_mutex);
2729
	mutex_init(&priv->bond_mutex);
2730

2731 2732 2733
	INIT_LIST_HEAD(&priv->pgdir_list);
	mutex_init(&priv->pgdir_mutex);

2734 2735 2736
	INIT_LIST_HEAD(&priv->bf_list);
	mutex_init(&priv->bf_mutex);

S
Sergei Shtylyov 已提交
2737
	dev->rev_id = pdev->revision;
2738
	dev->numa_node = dev_to_node(&pdev->dev);
2739

2740
	/* Detect if this device is a virtual function */
2741
	if (pci_dev_data & MLX4_PCI_DEV_IS_VF) {
2742 2743 2744 2745 2746 2747 2748 2749 2750
		mlx4_warn(dev, "Detected virtual function - running in slave mode\n");
		dev->flags |= MLX4_FLAG_SLAVE;
	} else {
		/* We reset the device and enable SRIOV only for physical
		 * devices.  Try to claim ownership on the device;
		 * if already taken, skip -- do not allow multiple PFs */
		err = mlx4_get_ownership(dev);
		if (err) {
			if (err < 0)
2751
				return err;
2752
			else {
J
Joe Perches 已提交
2753
				mlx4_warn(dev, "Multiple PFs not yet supported - Skipping PF\n");
2754
				return -EINVAL;
2755 2756
			}
		}
S
Sergei Shtylyov 已提交
2757

2758 2759 2760
		atomic_set(&priv->opreq_count, 0);
		INIT_WORK(&priv->opreq_task, mlx4_opreq_action);

2761 2762 2763 2764 2765 2766 2767
		/*
		 * Now reset the HCA before we touch the PCI capabilities or
		 * attempt a firmware command, since a boot ROM may have left
		 * the HCA in an undefined state.
		 */
		err = mlx4_reset(dev);
		if (err) {
J
Joe Perches 已提交
2768
			mlx4_err(dev, "Failed to reset HCA, aborting\n");
2769
			goto err_sriov;
2770
		}
2771 2772 2773

		if (total_vfs) {
			dev->flags = MLX4_FLAG_MASTER;
2774 2775 2776
			existing_vfs = pci_num_vf(pdev);
			if (existing_vfs)
				dev->flags |= MLX4_FLAG_SRIOV;
2777
			dev->persist->num_vfs = total_vfs;
2778
		}
2779 2780
	}

2781 2782 2783 2784 2785
	/* on load remove any previous indication of internal error,
	 * device is up.
	 */
	dev->persist->state = MLX4_DEVICE_STATE_UP;

2786
slave_start:
2787 2788
	err = mlx4_cmd_init(dev);
	if (err) {
J
Joe Perches 已提交
2789
		mlx4_err(dev, "Failed to init command interface, aborting\n");
2790 2791 2792 2793 2794 2795 2796
		goto err_sriov;
	}

	/* In slave functions, the communication channel must be initialized
	 * before posting commands. Also, init num_slaves before calling
	 * mlx4_init_hca */
	if (mlx4_is_mfunc(dev)) {
2797
		if (mlx4_is_master(dev)) {
2798
			dev->num_slaves = MLX4_MAX_NUM_SLAVES;
2799 2800

		} else {
2801
			dev->num_slaves = 0;
2802 2803
			err = mlx4_multi_func_init(dev);
			if (err) {
J
Joe Perches 已提交
2804
				mlx4_err(dev, "Failed to init slave mfunc interface, aborting\n");
2805 2806 2807
				goto err_cmd;
			}
		}
2808 2809
	}

2810 2811 2812 2813 2814 2815
	err = mlx4_init_fw(dev);
	if (err) {
		mlx4_err(dev, "Failed to init fw, aborting.\n");
		goto err_mfunc;
	}

2816
	if (mlx4_is_master(dev)) {
2817
		/* when we hit the goto slave_start below, dev_cap already initialized */
2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831
		if (!dev_cap) {
			dev_cap = kzalloc(sizeof(*dev_cap), GFP_KERNEL);

			if (!dev_cap) {
				err = -ENOMEM;
				goto err_fw;
			}

			err = mlx4_QUERY_DEV_CAP(dev, dev_cap);
			if (err) {
				mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n");
				goto err_fw;
			}

2832 2833 2834
			if (mlx4_check_dev_cap(dev, dev_cap, nvfs))
				goto err_fw;

2835
			if (!(dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS)) {
2836 2837 2838 2839
				u64 dev_flags = mlx4_enable_sriov(dev, pdev,
								  total_vfs,
								  existing_vfs,
								  reset_flow);
2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864

				mlx4_cmd_cleanup(dev, MLX4_CMD_CLEANUP_ALL);
				dev->flags = dev_flags;
				if (!SRIOV_VALID_STATE(dev->flags)) {
					mlx4_err(dev, "Invalid SRIOV state\n");
					goto err_sriov;
				}
				err = mlx4_reset(dev);
				if (err) {
					mlx4_err(dev, "Failed to reset HCA, aborting.\n");
					goto err_sriov;
				}
				goto slave_start;
			}
		} else {
			/* Legacy mode FW requires SRIOV to be enabled before
			 * doing QUERY_DEV_CAP, since max_eq's value is different if
			 * SRIOV is enabled.
			 */
			memset(dev_cap, 0, sizeof(*dev_cap));
			err = mlx4_QUERY_DEV_CAP(dev, dev_cap);
			if (err) {
				mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n");
				goto err_fw;
			}
2865 2866 2867

			if (mlx4_check_dev_cap(dev, dev_cap, nvfs))
				goto err_fw;
2868 2869 2870
		}
	}

2871
	err = mlx4_init_hca(dev);
2872 2873 2874 2875
	if (err) {
		if (err == -EACCES) {
			/* Not primary Physical function
			 * Running in slave mode */
2876
			mlx4_cmd_cleanup(dev, MLX4_CMD_CLEANUP_ALL);
2877 2878 2879 2880
			/* We're not a PF */
			if (dev->flags & MLX4_FLAG_SRIOV) {
				if (!existing_vfs)
					pci_disable_sriov(pdev);
2881
				if (mlx4_is_master(dev) && !reset_flow)
2882 2883 2884 2885 2886
					atomic_dec(&pf_loading);
				dev->flags &= ~MLX4_FLAG_SRIOV;
			}
			if (!mlx4_is_slave(dev))
				mlx4_free_ownership(dev);
2887 2888 2889 2890
			dev->flags |= MLX4_FLAG_SLAVE;
			dev->flags &= ~MLX4_FLAG_MASTER;
			goto slave_start;
		} else
2891
			goto err_fw;
2892 2893
	}

2894
	if (mlx4_is_master(dev) && (dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS)) {
2895 2896
		u64 dev_flags = mlx4_enable_sriov(dev, pdev, total_vfs,
						  existing_vfs, reset_flow);
2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918

		if ((dev->flags ^ dev_flags) & (MLX4_FLAG_MASTER | MLX4_FLAG_SLAVE)) {
			mlx4_cmd_cleanup(dev, MLX4_CMD_CLEANUP_VHCR);
			dev->flags = dev_flags;
			err = mlx4_cmd_init(dev);
			if (err) {
				/* Only VHCR is cleaned up, so could still
				 * send FW commands
				 */
				mlx4_err(dev, "Failed to init VHCR command interface, aborting\n");
				goto err_close;
			}
		} else {
			dev->flags = dev_flags;
		}

		if (!SRIOV_VALID_STATE(dev->flags)) {
			mlx4_err(dev, "Invalid SRIOV state\n");
			goto err_close;
		}
	}

2919 2920 2921 2922
	/* check if the device is functioning at its maximum possible speed.
	 * No return code for this call, just warn the user in case of PCI
	 * express device capabilities are under-satisfied by the bus.
	 */
2923 2924
	if (!mlx4_is_slave(dev))
		mlx4_check_pcie_caps(dev);
2925

2926 2927 2928
	/* In master functions, the communication channel must be initialized
	 * after obtaining its address from fw */
	if (mlx4_is_master(dev)) {
2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946
		int ib_ports = 0;

		mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
			ib_ports++;

		if (ib_ports &&
		    (num_vfs_argc > 1 || probe_vfs_argc > 1)) {
			mlx4_err(dev,
				 "Invalid syntax of num_vfs/probe_vfs with IB port - single port VFs syntax is only supported when all ports are configured as ethernet\n");
			err = -EINVAL;
			goto err_close;
		}
		if (dev->caps.num_ports < 2 &&
		    num_vfs_argc > 1) {
			err = -EINVAL;
			mlx4_err(dev,
				 "Error: Trying to configure VFs on port 2, but HCA has only %d physical ports\n",
				 dev->caps.num_ports);
2947 2948
			goto err_close;
		}
2949
		memcpy(dev->persist->nvfs, nvfs, sizeof(dev->persist->nvfs));
2950

2951 2952 2953
		for (i = 0;
		     i < sizeof(dev->persist->nvfs)/
		     sizeof(dev->persist->nvfs[0]); i++) {
2954 2955
			unsigned j;

2956
			for (j = 0; j < dev->persist->nvfs[i]; ++sum, ++j) {
2957 2958 2959
				dev->dev_vfs[sum].min_port = i < 2 ? i + 1 : 1;
				dev->dev_vfs[sum].n_ports = i < 2 ? 1 :
					dev->caps.num_ports;
2960 2961
			}
		}
2962 2963 2964 2965 2966 2967 2968 2969 2970

		/* In master functions, the communication channel
		 * must be initialized after obtaining its address from fw
		 */
		err = mlx4_multi_func_init(dev);
		if (err) {
			mlx4_err(dev, "Failed to init master mfunc interface, aborting.\n");
			goto err_close;
		}
2971
	}
2972

2973 2974
	err = mlx4_alloc_eq_table(dev);
	if (err)
2975
		goto err_master_mfunc;
2976

2977
	priv->msix_ctl.pool_bm = 0;
2978
	mutex_init(&priv->msix_ctl.pool_lock);
2979

2980
	mlx4_enable_msi_x(dev);
2981 2982
	if ((mlx4_is_mfunc(dev)) &&
	    !(dev->flags & MLX4_FLAG_MSI_X)) {
2983
		err = -ENOSYS;
J
Joe Perches 已提交
2984
		mlx4_err(dev, "INTx is not supported in multi-function mode, aborting\n");
2985
		goto err_free_eq;
2986 2987 2988 2989 2990
	}

	if (!mlx4_is_slave(dev)) {
		err = mlx4_init_steering(dev);
		if (err)
2991
			goto err_disable_msix;
2992
	}
2993

2994
	err = mlx4_setup_hca(dev);
2995 2996
	if (err == -EBUSY && (dev->flags & MLX4_FLAG_MSI_X) &&
	    !mlx4_is_mfunc(dev)) {
2997
		dev->flags &= ~MLX4_FLAG_MSI_X;
2998 2999
		dev->caps.num_comp_vectors = 1;
		dev->caps.comp_pool	   = 0;
3000 3001 3002 3003
		pci_disable_msix(pdev);
		err = mlx4_setup_hca(dev);
	}

3004
	if (err)
3005
		goto err_steer;
3006

3007
	mlx4_init_quotas(dev);
3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018
	/* When PF resources are ready arm its comm channel to enable
	 * getting commands
	 */
	if (mlx4_is_master(dev)) {
		err = mlx4_ARM_COMM_CHANNEL(dev);
		if (err) {
			mlx4_err(dev, " Failed to arm comm channel eq: %x\n",
				 err);
			goto err_steer;
		}
	}
3019

3020 3021 3022 3023 3024
	for (port = 1; port <= dev->caps.num_ports; port++) {
		err = mlx4_init_port_info(dev, port);
		if (err)
			goto err_port;
	}
3025

3026 3027 3028
	priv->v2p.port1 = 1;
	priv->v2p.port2 = 2;

3029 3030
	err = mlx4_register_device(dev);
	if (err)
3031
		goto err_port;
3032

3033 3034
	mlx4_request_modules(dev);

3035 3036 3037
	mlx4_sense_init(dev);
	mlx4_start_sense(dev);

3038
	priv->removed = 0;
3039

3040
	if (mlx4_is_master(dev) && dev->persist->num_vfs && !reset_flow)
3041 3042
		atomic_dec(&pf_loading);

3043
	kfree(dev_cap);
3044 3045
	return 0;

3046
err_port:
3047
	for (--port; port >= 1; --port)
3048 3049
		mlx4_cleanup_port_info(&priv->port[port]);

3050
	mlx4_cleanup_counters_table(dev);
3051 3052 3053 3054 3055
	mlx4_cleanup_qp_table(dev);
	mlx4_cleanup_srq_table(dev);
	mlx4_cleanup_cq_table(dev);
	mlx4_cmd_use_polling(dev);
	mlx4_cleanup_eq_table(dev);
3056
	mlx4_cleanup_mcg_table(dev);
3057
	mlx4_cleanup_mr_table(dev);
S
Sean Hefty 已提交
3058
	mlx4_cleanup_xrcd_table(dev);
3059 3060 3061
	mlx4_cleanup_pd_table(dev);
	mlx4_cleanup_uar_table(dev);

3062
err_steer:
3063 3064
	if (!mlx4_is_slave(dev))
		mlx4_clear_steering(dev);
3065

3066 3067 3068 3069
err_disable_msix:
	if (dev->flags & MLX4_FLAG_MSI_X)
		pci_disable_msix(pdev);

3070 3071 3072
err_free_eq:
	mlx4_free_eq_table(dev);

3073
err_master_mfunc:
3074 3075
	if (mlx4_is_master(dev)) {
		mlx4_free_resource_tracker(dev, RES_TR_FREE_STRUCTS_ONLY);
3076
		mlx4_multi_func_cleanup(dev);
3077
	}
3078

3079
	if (mlx4_is_slave(dev)) {
3080
		kfree(dev->caps.qp0_qkey);
3081 3082 3083 3084 3085 3086
		kfree(dev->caps.qp0_tunnel);
		kfree(dev->caps.qp0_proxy);
		kfree(dev->caps.qp1_tunnel);
		kfree(dev->caps.qp1_proxy);
	}

3087 3088 3089
err_close:
	mlx4_close_hca(dev);

3090 3091 3092
err_fw:
	mlx4_close_fw(dev);

3093 3094 3095 3096
err_mfunc:
	if (mlx4_is_slave(dev))
		mlx4_multi_func_cleanup(dev);

3097
err_cmd:
3098
	mlx4_cmd_cleanup(dev, MLX4_CMD_CLEANUP_ALL);
3099

3100
err_sriov:
3101
	if (dev->flags & MLX4_FLAG_SRIOV && !existing_vfs) {
3102
		pci_disable_sriov(pdev);
3103 3104
		dev->flags &= ~MLX4_FLAG_SRIOV;
	}
3105

3106
	if (mlx4_is_master(dev) && dev->persist->num_vfs && !reset_flow)
3107 3108
		atomic_dec(&pf_loading);

3109 3110
	kfree(priv->dev.dev_vfs);

3111 3112 3113
	if (!mlx4_is_slave(dev))
		mlx4_free_ownership(dev);

3114
	kfree(dev_cap);
3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246
	return err;
}

static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data,
			   struct mlx4_priv *priv)
{
	int err;
	int nvfs[MLX4_MAX_PORTS + 1] = {0, 0, 0};
	int prb_vf[MLX4_MAX_PORTS + 1] = {0, 0, 0};
	const int param_map[MLX4_MAX_PORTS + 1][MLX4_MAX_PORTS + 1] = {
		{2, 0, 0}, {0, 1, 2}, {0, 1, 2} };
	unsigned total_vfs = 0;
	unsigned int i;

	pr_info(DRV_NAME ": Initializing %s\n", pci_name(pdev));

	err = pci_enable_device(pdev);
	if (err) {
		dev_err(&pdev->dev, "Cannot enable PCI device, aborting\n");
		return err;
	}

	/* Due to requirement that all VFs and the PF are *guaranteed* 2 MACS
	 * per port, we must limit the number of VFs to 63 (since their are
	 * 128 MACs)
	 */
	for (i = 0; i < sizeof(nvfs)/sizeof(nvfs[0]) && i < num_vfs_argc;
	     total_vfs += nvfs[param_map[num_vfs_argc - 1][i]], i++) {
		nvfs[param_map[num_vfs_argc - 1][i]] = num_vfs[i];
		if (nvfs[i] < 0) {
			dev_err(&pdev->dev, "num_vfs module parameter cannot be negative\n");
			err = -EINVAL;
			goto err_disable_pdev;
		}
	}
	for (i = 0; i < sizeof(prb_vf)/sizeof(prb_vf[0]) && i < probe_vfs_argc;
	     i++) {
		prb_vf[param_map[probe_vfs_argc - 1][i]] = probe_vf[i];
		if (prb_vf[i] < 0 || prb_vf[i] > nvfs[i]) {
			dev_err(&pdev->dev, "probe_vf module parameter cannot be negative or greater than num_vfs\n");
			err = -EINVAL;
			goto err_disable_pdev;
		}
	}
	if (total_vfs >= MLX4_MAX_NUM_VF) {
		dev_err(&pdev->dev,
			"Requested more VF's (%d) than allowed (%d)\n",
			total_vfs, MLX4_MAX_NUM_VF - 1);
		err = -EINVAL;
		goto err_disable_pdev;
	}

	for (i = 0; i < MLX4_MAX_PORTS; i++) {
		if (nvfs[i] + nvfs[2] >= MLX4_MAX_NUM_VF_P_PORT) {
			dev_err(&pdev->dev,
				"Requested more VF's (%d) for port (%d) than allowed (%d)\n",
				nvfs[i] + nvfs[2], i + 1,
				MLX4_MAX_NUM_VF_P_PORT - 1);
			err = -EINVAL;
			goto err_disable_pdev;
		}
	}

	/* Check for BARs. */
	if (!(pci_dev_data & MLX4_PCI_DEV_IS_VF) &&
	    !(pci_resource_flags(pdev, 0) & IORESOURCE_MEM)) {
		dev_err(&pdev->dev, "Missing DCS, aborting (driver_data: 0x%x, pci_resource_flags(pdev, 0):0x%lx)\n",
			pci_dev_data, pci_resource_flags(pdev, 0));
		err = -ENODEV;
		goto err_disable_pdev;
	}
	if (!(pci_resource_flags(pdev, 2) & IORESOURCE_MEM)) {
		dev_err(&pdev->dev, "Missing UAR, aborting\n");
		err = -ENODEV;
		goto err_disable_pdev;
	}

	err = pci_request_regions(pdev, DRV_NAME);
	if (err) {
		dev_err(&pdev->dev, "Couldn't get PCI resources, aborting\n");
		goto err_disable_pdev;
	}

	pci_set_master(pdev);

	err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
	if (err) {
		dev_warn(&pdev->dev, "Warning: couldn't set 64-bit PCI DMA mask\n");
		err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
		if (err) {
			dev_err(&pdev->dev, "Can't set PCI DMA mask, aborting\n");
			goto err_release_regions;
		}
	}
	err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
	if (err) {
		dev_warn(&pdev->dev, "Warning: couldn't set 64-bit consistent PCI DMA mask\n");
		err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
		if (err) {
			dev_err(&pdev->dev, "Can't set consistent PCI DMA mask, aborting\n");
			goto err_release_regions;
		}
	}

	/* Allow large DMA segments, up to the firmware limit of 1 GB */
	dma_set_max_seg_size(&pdev->dev, 1024 * 1024 * 1024);
	/* Detect if this device is a virtual function */
	if (pci_dev_data & MLX4_PCI_DEV_IS_VF) {
		/* When acting as pf, we normally skip vfs unless explicitly
		 * requested to probe them.
		 */
		if (total_vfs) {
			unsigned vfs_offset = 0;

			for (i = 0; i < sizeof(nvfs)/sizeof(nvfs[0]) &&
			     vfs_offset + nvfs[i] < extended_func_num(pdev);
			     vfs_offset += nvfs[i], i++)
				;
			if (i == sizeof(nvfs)/sizeof(nvfs[0])) {
				err = -ENODEV;
				goto err_release_regions;
			}
			if ((extended_func_num(pdev) - vfs_offset)
			    > prb_vf[i]) {
				dev_warn(&pdev->dev, "Skipping virtual function:%d\n",
					 extended_func_num(pdev));
				err = -ENODEV;
				goto err_release_regions;
			}
		}
	}

3247
	err = mlx4_catas_init(&priv->dev);
3248 3249
	if (err)
		goto err_release_regions;
3250

3251
	err = mlx4_load_one(pdev, pci_dev_data, total_vfs, nvfs, priv, 0);
3252 3253 3254
	if (err)
		goto err_catas;

3255
	return 0;
3256

3257 3258 3259
err_catas:
	mlx4_catas_end(&priv->dev);

3260 3261
err_release_regions:
	pci_release_regions(pdev);
3262 3263 3264 3265 3266 3267 3268

err_disable_pdev:
	pci_disable_device(pdev);
	pci_set_drvdata(pdev, NULL);
	return err;
}

3269
static int mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
R
Roland Dreier 已提交
3270
{
3271 3272
	struct mlx4_priv *priv;
	struct mlx4_dev *dev;
3273
	int ret;
3274

3275
	printk_once(KERN_INFO "%s", mlx4_version);
R
Roland Dreier 已提交
3276

3277 3278 3279 3280 3281
	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
	if (!priv)
		return -ENOMEM;

	dev       = &priv->dev;
3282 3283 3284 3285 3286 3287 3288 3289
	dev->persist = kzalloc(sizeof(*dev->persist), GFP_KERNEL);
	if (!dev->persist) {
		kfree(priv);
		return -ENOMEM;
	}
	dev->persist->pdev = pdev;
	dev->persist->dev = dev;
	pci_set_drvdata(pdev, dev->persist);
3290
	priv->pci_dev_data = id->driver_data;
3291
	mutex_init(&dev->persist->device_state_mutex);
3292
	mutex_init(&dev->persist->interface_state_mutex);
3293

3294
	ret =  __mlx4_init_one(pdev, id->driver_data, priv);
3295 3296
	if (ret) {
		kfree(dev->persist);
3297
		kfree(priv);
3298 3299
	} else {
		pci_save_state(pdev);
3300
	}
3301

3302
	return ret;
R
Roland Dreier 已提交
3303 3304
}

3305 3306 3307 3308
static void mlx4_clean_dev(struct mlx4_dev *dev)
{
	struct mlx4_dev_persistent *persist = dev->persist;
	struct mlx4_priv *priv = mlx4_priv(dev);
3309
	unsigned long	flags = (dev->flags & RESET_PERSIST_MASK_FLAGS);
3310 3311 3312

	memset(priv, 0, sizeof(*priv));
	priv->dev.persist = persist;
3313
	priv->dev.flags = flags;
3314 3315
}

3316
static void mlx4_unload_one(struct pci_dev *pdev)
3317
{
3318 3319
	struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev);
	struct mlx4_dev  *dev  = persist->dev;
3320
	struct mlx4_priv *priv = mlx4_priv(dev);
3321
	int               pci_dev_data;
3322
	int p, i;
3323

3324 3325
	if (priv->removed)
		return;
3326

3327 3328 3329 3330 3331 3332 3333
	/* saving current ports type for further use */
	for (i = 0; i < dev->caps.num_ports; i++) {
		dev->persist->curr_port_type[i] = dev->caps.port_type[i + 1];
		dev->persist->curr_port_poss_type[i] = dev->caps.
						       possible_type[i + 1];
	}

3334
	pci_dev_data = priv->pci_dev_data;
3335

3336 3337
	mlx4_stop_sense(dev);
	mlx4_unregister_device(dev);
3338

3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357
	for (p = 1; p <= dev->caps.num_ports; p++) {
		mlx4_cleanup_port_info(&priv->port[p]);
		mlx4_CLOSE_PORT(dev, p);
	}

	if (mlx4_is_master(dev))
		mlx4_free_resource_tracker(dev,
					   RES_TR_FREE_SLAVES_ONLY);

	mlx4_cleanup_counters_table(dev);
	mlx4_cleanup_qp_table(dev);
	mlx4_cleanup_srq_table(dev);
	mlx4_cleanup_cq_table(dev);
	mlx4_cmd_use_polling(dev);
	mlx4_cleanup_eq_table(dev);
	mlx4_cleanup_mcg_table(dev);
	mlx4_cleanup_mr_table(dev);
	mlx4_cleanup_xrcd_table(dev);
	mlx4_cleanup_pd_table(dev);
3358

3359 3360 3361
	if (mlx4_is_master(dev))
		mlx4_free_resource_tracker(dev,
					   RES_TR_FREE_STRUCTS_ONLY);
3362

3363 3364 3365 3366 3367 3368 3369 3370 3371
	iounmap(priv->kar);
	mlx4_uar_free(dev, &priv->driver_uar);
	mlx4_cleanup_uar_table(dev);
	if (!mlx4_is_slave(dev))
		mlx4_clear_steering(dev);
	mlx4_free_eq_table(dev);
	if (mlx4_is_master(dev))
		mlx4_multi_func_cleanup(dev);
	mlx4_close_hca(dev);
3372
	mlx4_close_fw(dev);
3373 3374
	if (mlx4_is_slave(dev))
		mlx4_multi_func_cleanup(dev);
3375
	mlx4_cmd_cleanup(dev, MLX4_CMD_CLEANUP_ALL);
3376

3377 3378 3379 3380 3381 3382
	if (dev->flags & MLX4_FLAG_MSI_X)
		pci_disable_msix(pdev);

	if (!mlx4_is_slave(dev))
		mlx4_free_ownership(dev);

3383
	kfree(dev->caps.qp0_qkey);
3384 3385 3386 3387 3388 3389
	kfree(dev->caps.qp0_tunnel);
	kfree(dev->caps.qp0_proxy);
	kfree(dev->caps.qp1_tunnel);
	kfree(dev->caps.qp1_proxy);
	kfree(dev->dev_vfs);

3390
	mlx4_clean_dev(dev);
3391 3392 3393 3394 3395 3396
	priv->pci_dev_data = pci_dev_data;
	priv->removed = 1;
}

static void mlx4_remove_one(struct pci_dev *pdev)
{
3397 3398
	struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev);
	struct mlx4_dev  *dev  = persist->dev;
3399
	struct mlx4_priv *priv = mlx4_priv(dev);
3400
	int active_vfs = 0;
3401

3402 3403 3404 3405
	mutex_lock(&persist->interface_state_mutex);
	persist->interface_state |= MLX4_INTERFACE_STATE_DELETION;
	mutex_unlock(&persist->interface_state_mutex);

3406 3407 3408 3409 3410 3411 3412 3413 3414
	/* Disabling SR-IOV is not allowed while there are active vf's */
	if (mlx4_is_master(dev) && dev->flags & MLX4_FLAG_SRIOV) {
		active_vfs = mlx4_how_many_lives_vf(dev);
		if (active_vfs) {
			pr_warn("Removing PF when there are active VF's !!\n");
			pr_warn("Will not disable SR-IOV.\n");
		}
	}

3415 3416 3417 3418 3419 3420 3421
	/* device marked to be under deletion running now without the lock
	 * letting other tasks to be terminated
	 */
	if (persist->interface_state & MLX4_INTERFACE_STATE_UP)
		mlx4_unload_one(pdev);
	else
		mlx4_info(dev, "%s: interface is down\n", __func__);
3422
	mlx4_catas_end(dev);
3423 3424 3425 3426 3427
	if (dev->flags & MLX4_FLAG_SRIOV && !active_vfs) {
		mlx4_warn(dev, "Disabling SR-IOV\n");
		pci_disable_sriov(pdev);
	}

3428 3429
	pci_release_regions(pdev);
	pci_disable_device(pdev);
3430
	kfree(dev->persist);
3431 3432
	kfree(priv);
	pci_set_drvdata(pdev, NULL);
3433 3434
}

3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453
static int restore_current_port_types(struct mlx4_dev *dev,
				      enum mlx4_port_type *types,
				      enum mlx4_port_type *poss_types)
{
	struct mlx4_priv *priv = mlx4_priv(dev);
	int err, i;

	mlx4_stop_sense(dev);

	mutex_lock(&priv->port_mutex);
	for (i = 0; i < dev->caps.num_ports; i++)
		dev->caps.possible_type[i + 1] = poss_types[i];
	err = mlx4_change_port_types(dev, types);
	mlx4_start_sense(dev);
	mutex_unlock(&priv->port_mutex);

	return err;
}

3454 3455
int mlx4_restart_one(struct pci_dev *pdev)
{
3456 3457
	struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev);
	struct mlx4_dev	 *dev  = persist->dev;
3458
	struct mlx4_priv *priv = mlx4_priv(dev);
3459 3460
	int nvfs[MLX4_MAX_PORTS + 1] = {0, 0, 0};
	int pci_dev_data, err, total_vfs;
3461 3462

	pci_dev_data = priv->pci_dev_data;
3463 3464
	total_vfs = dev->persist->num_vfs;
	memcpy(nvfs, dev->persist->nvfs, sizeof(dev->persist->nvfs));
3465 3466

	mlx4_unload_one(pdev);
3467
	err = mlx4_load_one(pdev, pci_dev_data, total_vfs, nvfs, priv, 1);
3468 3469 3470 3471 3472 3473
	if (err) {
		mlx4_err(dev, "%s: ERROR: mlx4_load_one failed, pci_name=%s, err=%d\n",
			 __func__, pci_name(pdev), err);
		return err;
	}

3474 3475 3476 3477 3478 3479
	err = restore_current_port_types(dev, dev->persist->curr_port_type,
					 dev->persist->curr_port_poss_type);
	if (err)
		mlx4_err(dev, "could not restore original port types (%d)\n",
			 err);

3480
	return err;
3481 3482
}

3483
static const struct pci_device_id mlx4_pci_table[] = {
3484
	/* MT25408 "Hermon" SDR */
3485
	{ PCI_VDEVICE(MELLANOX, 0x6340), MLX4_PCI_DEV_FORCE_SENSE_PORT },
3486
	/* MT25408 "Hermon" DDR */
3487
	{ PCI_VDEVICE(MELLANOX, 0x634a), MLX4_PCI_DEV_FORCE_SENSE_PORT },
3488
	/* MT25408 "Hermon" QDR */
3489
	{ PCI_VDEVICE(MELLANOX, 0x6354), MLX4_PCI_DEV_FORCE_SENSE_PORT },
3490
	/* MT25408 "Hermon" DDR PCIe gen2 */
3491
	{ PCI_VDEVICE(MELLANOX, 0x6732), MLX4_PCI_DEV_FORCE_SENSE_PORT },
3492
	/* MT25408 "Hermon" QDR PCIe gen2 */
3493
	{ PCI_VDEVICE(MELLANOX, 0x673c), MLX4_PCI_DEV_FORCE_SENSE_PORT },
3494
	/* MT25408 "Hermon" EN 10GigE */
3495
	{ PCI_VDEVICE(MELLANOX, 0x6368), MLX4_PCI_DEV_FORCE_SENSE_PORT },
3496
	/* MT25408 "Hermon" EN 10GigE PCIe gen2 */
3497
	{ PCI_VDEVICE(MELLANOX, 0x6750), MLX4_PCI_DEV_FORCE_SENSE_PORT },
3498
	/* MT25458 ConnectX EN 10GBASE-T 10GigE */
3499
	{ PCI_VDEVICE(MELLANOX, 0x6372), MLX4_PCI_DEV_FORCE_SENSE_PORT },
3500
	/* MT25458 ConnectX EN 10GBASE-T+Gen2 10GigE */
3501
	{ PCI_VDEVICE(MELLANOX, 0x675a), MLX4_PCI_DEV_FORCE_SENSE_PORT },
3502
	/* MT26468 ConnectX EN 10GigE PCIe gen2*/
3503
	{ PCI_VDEVICE(MELLANOX, 0x6764), MLX4_PCI_DEV_FORCE_SENSE_PORT },
3504
	/* MT26438 ConnectX EN 40GigE PCIe gen2 5GT/s */
3505
	{ PCI_VDEVICE(MELLANOX, 0x6746), MLX4_PCI_DEV_FORCE_SENSE_PORT },
3506
	/* MT26478 ConnectX2 40GigE PCIe gen2 */
3507
	{ PCI_VDEVICE(MELLANOX, 0x676e), MLX4_PCI_DEV_FORCE_SENSE_PORT },
3508
	/* MT25400 Family [ConnectX-2 Virtual Function] */
3509
	{ PCI_VDEVICE(MELLANOX, 0x1002), MLX4_PCI_DEV_IS_VF },
3510 3511 3512
	/* MT27500 Family [ConnectX-3] */
	{ PCI_VDEVICE(MELLANOX, 0x1003), 0 },
	/* MT27500 Family [ConnectX-3 Virtual Function] */
3513
	{ PCI_VDEVICE(MELLANOX, 0x1004), MLX4_PCI_DEV_IS_VF },
3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525
	{ PCI_VDEVICE(MELLANOX, 0x1005), 0 }, /* MT27510 Family */
	{ PCI_VDEVICE(MELLANOX, 0x1006), 0 }, /* MT27511 Family */
	{ PCI_VDEVICE(MELLANOX, 0x1007), 0 }, /* MT27520 Family */
	{ PCI_VDEVICE(MELLANOX, 0x1008), 0 }, /* MT27521 Family */
	{ PCI_VDEVICE(MELLANOX, 0x1009), 0 }, /* MT27530 Family */
	{ PCI_VDEVICE(MELLANOX, 0x100a), 0 }, /* MT27531 Family */
	{ PCI_VDEVICE(MELLANOX, 0x100b), 0 }, /* MT27540 Family */
	{ PCI_VDEVICE(MELLANOX, 0x100c), 0 }, /* MT27541 Family */
	{ PCI_VDEVICE(MELLANOX, 0x100d), 0 }, /* MT27550 Family */
	{ PCI_VDEVICE(MELLANOX, 0x100e), 0 }, /* MT27551 Family */
	{ PCI_VDEVICE(MELLANOX, 0x100f), 0 }, /* MT27560 Family */
	{ PCI_VDEVICE(MELLANOX, 0x1010), 0 }, /* MT27561 Family */
3526 3527 3528 3529 3530
	{ 0, }
};

MODULE_DEVICE_TABLE(pci, mlx4_pci_table);

3531 3532 3533
static pci_ers_result_t mlx4_pci_err_detected(struct pci_dev *pdev,
					      pci_channel_state_t state)
{
3534 3535 3536 3537
	struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev);

	mlx4_err(persist->dev, "mlx4_pci_err_detected was called\n");
	mlx4_enter_error_state(persist);
3538

3539 3540 3541 3542 3543 3544 3545 3546 3547 3548
	mutex_lock(&persist->interface_state_mutex);
	if (persist->interface_state & MLX4_INTERFACE_STATE_UP)
		mlx4_unload_one(pdev);

	mutex_unlock(&persist->interface_state_mutex);
	if (state == pci_channel_io_perm_failure)
		return PCI_ERS_RESULT_DISCONNECT;

	pci_disable_device(pdev);
	return PCI_ERS_RESULT_NEED_RESET;
3549 3550 3551 3552
}

static pci_ers_result_t mlx4_pci_slot_reset(struct pci_dev *pdev)
{
3553 3554
	struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev);
	struct mlx4_dev	 *dev  = persist->dev;
3555 3556
	struct mlx4_priv *priv = mlx4_priv(dev);
	int               ret;
3557 3558
	int nvfs[MLX4_MAX_PORTS + 1] = {0, 0, 0};
	int total_vfs;
3559

3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576
	mlx4_err(dev, "mlx4_pci_slot_reset was called\n");
	ret = pci_enable_device(pdev);
	if (ret) {
		mlx4_err(dev, "Can not re-enable device, ret=%d\n", ret);
		return PCI_ERS_RESULT_DISCONNECT;
	}

	pci_set_master(pdev);
	pci_restore_state(pdev);
	pci_save_state(pdev);

	total_vfs = dev->persist->num_vfs;
	memcpy(nvfs, dev->persist->nvfs, sizeof(dev->persist->nvfs));

	mutex_lock(&persist->interface_state_mutex);
	if (!(persist->interface_state & MLX4_INTERFACE_STATE_UP)) {
		ret = mlx4_load_one(pdev, priv->pci_dev_data, total_vfs, nvfs,
3577
				    priv, 1);
3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591
		if (ret) {
			mlx4_err(dev, "%s: mlx4_load_one failed, ret=%d\n",
				 __func__,  ret);
			goto end;
		}

		ret = restore_current_port_types(dev, dev->persist->
						 curr_port_type, dev->persist->
						 curr_port_poss_type);
		if (ret)
			mlx4_err(dev, "could not restore original port types (%d)\n", ret);
	}
end:
	mutex_unlock(&persist->interface_state_mutex);
3592 3593 3594 3595

	return ret ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
}

3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606
static void mlx4_shutdown(struct pci_dev *pdev)
{
	struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev);

	mlx4_info(persist->dev, "mlx4_shutdown was called\n");
	mutex_lock(&persist->interface_state_mutex);
	if (persist->interface_state & MLX4_INTERFACE_STATE_UP)
		mlx4_unload_one(pdev);
	mutex_unlock(&persist->interface_state_mutex);
}

3607
static const struct pci_error_handlers mlx4_err_handler = {
3608 3609 3610 3611
	.error_detected = mlx4_pci_err_detected,
	.slot_reset     = mlx4_pci_slot_reset,
};

3612 3613 3614 3615
static struct pci_driver mlx4_driver = {
	.name		= DRV_NAME,
	.id_table	= mlx4_pci_table,
	.probe		= mlx4_init_one,
3616
	.shutdown	= mlx4_shutdown,
3617
	.remove		= mlx4_remove_one,
3618
	.err_handler    = &mlx4_err_handler,
3619 3620
};

3621 3622 3623
static int __init mlx4_verify_params(void)
{
	if ((log_num_mac < 0) || (log_num_mac > 7)) {
3624
		pr_warn("mlx4_core: bad num_mac: %d\n", log_num_mac);
3625 3626 3627
		return -1;
	}

3628
	if (log_num_vlan != 0)
3629 3630
		pr_warn("mlx4_core: log_num_vlan - obsolete module param, using %d\n",
			MLX4_LOG_NUM_VLANS);
3631

3632 3633
	if (use_prio != 0)
		pr_warn("mlx4_core: use_prio - obsolete module param, ignored\n");
3634

3635
	if ((log_mtts_per_seg < 1) || (log_mtts_per_seg > 7)) {
3636 3637
		pr_warn("mlx4_core: bad log_mtts_per_seg: %d\n",
			log_mtts_per_seg);
3638 3639 3640
		return -1;
	}

3641 3642
	/* Check if module param for ports type has legal combination */
	if (port_type_array[0] == false && port_type_array[1] == true) {
3643
		pr_warn("Module parameter configuration ETH/IB is not supported. Switching to default configuration IB/IB\n");
3644 3645 3646
		port_type_array[0] = true;
	}

3647 3648 3649 3650 3651
	if (mlx4_log_num_mgm_entry_size < -7 ||
	    (mlx4_log_num_mgm_entry_size > 0 &&
	     (mlx4_log_num_mgm_entry_size < MLX4_MIN_MGM_LOG_ENTRY_SIZE ||
	      mlx4_log_num_mgm_entry_size > MLX4_MAX_MGM_LOG_ENTRY_SIZE))) {
		pr_warn("mlx4_core: mlx4_log_num_mgm_entry_size (%d) not in legal range (-7..0 or %d..%d)\n",
J
Joe Perches 已提交
3652 3653 3654
			mlx4_log_num_mgm_entry_size,
			MLX4_MIN_MGM_LOG_ENTRY_SIZE,
			MLX4_MAX_MGM_LOG_ENTRY_SIZE);
3655 3656 3657
		return -1;
	}

3658 3659 3660
	return 0;
}

3661 3662 3663 3664
static int __init mlx4_init(void)
{
	int ret;

3665 3666 3667
	if (mlx4_verify_params())
		return -EINVAL;

3668 3669 3670 3671

	mlx4_wq = create_singlethread_workqueue("mlx4");
	if (!mlx4_wq)
		return -ENOMEM;
3672

3673
	ret = pci_register_driver(&mlx4_driver);
3674 3675
	if (ret < 0)
		destroy_workqueue(mlx4_wq);
3676 3677 3678 3679 3680 3681
	return ret < 0 ? ret : 0;
}

static void __exit mlx4_cleanup(void)
{
	pci_unregister_driver(&mlx4_driver);
3682
	destroy_workqueue(mlx4_wq);
3683 3684 3685 3686
}

module_init(mlx4_init);
module_exit(mlx4_cleanup);