提交 ce9d3c9a 编写于 作者: L Linus Torvalds

Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband: (87 commits)
  mlx4_core: Fix section mismatches
  IPoIB: Allow setting policy to ignore multicast groups
  IB/mthca: Mark error paths as unlikely() in post_srq_recv functions
  IB/ipath: Minor fix to ordering of freeing and zeroing of tid pages.
  IB/ipath: Remove redundant link state checks
  IB/ipath: Fix IB_EVENT_PORT_ERR event
  IB/ipath: Better handling of unexpected GPIO interrupts
  IB/ipath: Maintain active time on all chips
  IB/ipath: Fix QHT7040 serial number check
  IB/ipath: Indicate a couple of chip bugs to userspace
  IB/ipath: iba6110 rev4 no longer needs recv header overrun workaround
  IB/ipath: Use counters in ipath_poll and cleanup interrupts in ipath_close
  IB/ipath: Remove duplicate copy of LMC
  IB/ipath: Add ability to set the LMC via the sysfs debugging interface
  IB/ipath: Optimize completion queue entry insertion and polling
  IB/ipath: Implement IB_EVENT_QP_LAST_WQE_REACHED
  IB/ipath: Generate flush CQE when QP is in error state
  IB/ipath: Remove redundant code
  IB/ipath: Future proof eeprom checksum code (contents reading)
  IB/ipath: UC RDMA WRITE with IMMEDIATE doesn't send the immediate
  ...
......@@ -99,6 +99,20 @@ Transaction IDs
request/response pairs. The upper 32 bits are reserved for use by
the kernel and will be overwritten before a MAD is sent.
P_Key Index Handling
The old ib_umad interface did not allow setting the P_Key index for
MADs that are sent and did not provide a way for obtaining the P_Key
index of received MADs. A new layout for struct ib_user_mad_hdr
with a pkey_index member has been defined; however, to preserve
binary compatibility with older applications, this new layout will
not be used unless the IB_USER_MAD_ENABLE_PKEY ioctl is called
before a file descriptor is used for anything else.
In September 2008, the IB_USER_MAD_ABI_VERSION will be incremented
to 6, the new layout of struct ib_user_mad_hdr will be used by
default, and the IB_USER_MAD_ENABLE_PKEY ioctl will be removed.
Setting IsSM Capability Bit
To set the IsSM capability bit for a port, simply open the
......
......@@ -161,8 +161,7 @@ static void addr_send_arp(struct sockaddr_in *dst_in)
if (ip_route_output_key(&rt, &fl))
return;
arp_send(ARPOP_REQUEST, ETH_P_ARP, rt->rt_gateway, rt->idev->dev,
rt->rt_src, NULL, rt->idev->dev->dev_addr, NULL);
neigh_event_send(rt->u.dst.neighbour, NULL);
ip_rt_put(rt);
}
......
......@@ -2219,6 +2219,9 @@ int ib_send_cm_mra(struct ib_cm_id *cm_id,
{
struct cm_id_private *cm_id_priv;
struct ib_mad_send_buf *msg;
enum ib_cm_state cm_state;
enum ib_cm_lap_state lap_state;
enum cm_msg_response msg_response;
void *data;
unsigned long flags;
int ret;
......@@ -2235,48 +2238,40 @@ int ib_send_cm_mra(struct ib_cm_id *cm_id,
spin_lock_irqsave(&cm_id_priv->lock, flags);
switch(cm_id_priv->id.state) {
case IB_CM_REQ_RCVD:
ret = cm_alloc_msg(cm_id_priv, &msg);
if (ret)
goto error1;
cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
CM_MSG_RESPONSE_REQ, service_timeout,
private_data, private_data_len);
ret = ib_post_send_mad(msg, NULL);
if (ret)
goto error2;
cm_id->state = IB_CM_MRA_REQ_SENT;
cm_state = IB_CM_MRA_REQ_SENT;
lap_state = cm_id->lap_state;
msg_response = CM_MSG_RESPONSE_REQ;
break;
case IB_CM_REP_RCVD:
ret = cm_alloc_msg(cm_id_priv, &msg);
if (ret)
goto error1;
cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
CM_MSG_RESPONSE_REP, service_timeout,
private_data, private_data_len);
ret = ib_post_send_mad(msg, NULL);
if (ret)
goto error2;
cm_id->state = IB_CM_MRA_REP_SENT;
cm_state = IB_CM_MRA_REP_SENT;
lap_state = cm_id->lap_state;
msg_response = CM_MSG_RESPONSE_REP;
break;
case IB_CM_ESTABLISHED:
cm_state = cm_id->state;
lap_state = IB_CM_MRA_LAP_SENT;
msg_response = CM_MSG_RESPONSE_OTHER;
break;
default:
ret = -EINVAL;
goto error1;
}
if (!(service_timeout & IB_CM_MRA_FLAG_DELAY)) {
ret = cm_alloc_msg(cm_id_priv, &msg);
if (ret)
goto error1;
cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
CM_MSG_RESPONSE_OTHER, service_timeout,
msg_response, service_timeout,
private_data, private_data_len);
ret = ib_post_send_mad(msg, NULL);
if (ret)
goto error2;
cm_id->lap_state = IB_CM_MRA_LAP_SENT;
break;
default:
ret = -EINVAL;
goto error1;
}
cm_id->state = cm_state;
cm_id->lap_state = lap_state;
cm_id_priv->service_timeout = service_timeout;
cm_set_private_data(cm_id_priv, data, private_data_len);
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
......
......@@ -52,6 +52,7 @@ MODULE_LICENSE("Dual BSD/GPL");
#define CMA_CM_RESPONSE_TIMEOUT 20
#define CMA_MAX_CM_RETRIES 15
#define CMA_CM_MRA_SETTING (IB_CM_MRA_FLAG_DELAY | 24)
static void cma_add_one(struct ib_device *device);
static void cma_remove_one(struct ib_device *device);
......@@ -138,6 +139,7 @@ struct rdma_id_private {
u32 qkey;
u32 qp_num;
u8 srq;
u8 tos;
};
struct cma_multicast {
......@@ -1089,6 +1091,7 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
event.param.ud.private_data_len =
IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE - offset;
} else {
ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0);
conn_id = cma_new_conn_id(&listen_id->id, ib_event);
cma_set_req_event_data(&event, &ib_event->param.req_rcvd,
ib_event->private_data, offset);
......@@ -1474,6 +1477,15 @@ int rdma_listen(struct rdma_cm_id *id, int backlog)
}
EXPORT_SYMBOL(rdma_listen);
void rdma_set_service_type(struct rdma_cm_id *id, int tos)
{
struct rdma_id_private *id_priv;
id_priv = container_of(id, struct rdma_id_private, id);
id_priv->tos = (u8) tos;
}
EXPORT_SYMBOL(rdma_set_service_type);
static void cma_query_handler(int status, struct ib_sa_path_rec *path_rec,
void *context)
{
......@@ -1498,23 +1510,37 @@ static void cma_query_handler(int status, struct ib_sa_path_rec *path_rec,
static int cma_query_ib_route(struct rdma_id_private *id_priv, int timeout_ms,
struct cma_work *work)
{
struct rdma_dev_addr *addr = &id_priv->id.route.addr.dev_addr;
struct rdma_addr *addr = &id_priv->id.route.addr;
struct ib_sa_path_rec path_rec;
ib_sa_comp_mask comp_mask;
struct sockaddr_in6 *sin6;
memset(&path_rec, 0, sizeof path_rec);
ib_addr_get_sgid(addr, &path_rec.sgid);
ib_addr_get_dgid(addr, &path_rec.dgid);
path_rec.pkey = cpu_to_be16(ib_addr_get_pkey(addr));
ib_addr_get_sgid(&addr->dev_addr, &path_rec.sgid);
ib_addr_get_dgid(&addr->dev_addr, &path_rec.dgid);
path_rec.pkey = cpu_to_be16(ib_addr_get_pkey(&addr->dev_addr));
path_rec.numb_path = 1;
path_rec.reversible = 1;
path_rec.service_id = cma_get_service_id(id_priv->id.ps, &addr->dst_addr);
comp_mask = IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID |
IB_SA_PATH_REC_PKEY | IB_SA_PATH_REC_NUMB_PATH |
IB_SA_PATH_REC_REVERSIBLE | IB_SA_PATH_REC_SERVICE_ID;
if (addr->src_addr.sa_family == AF_INET) {
path_rec.qos_class = cpu_to_be16((u16) id_priv->tos);
comp_mask |= IB_SA_PATH_REC_QOS_CLASS;
} else {
sin6 = (struct sockaddr_in6 *) &addr->src_addr;
path_rec.traffic_class = (u8) (be32_to_cpu(sin6->sin6_flowinfo) >> 20);
comp_mask |= IB_SA_PATH_REC_TRAFFIC_CLASS;
}
id_priv->query_id = ib_sa_path_rec_get(&sa_client, id_priv->id.device,
id_priv->id.port_num, &path_rec,
IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID |
IB_SA_PATH_REC_PKEY | IB_SA_PATH_REC_NUMB_PATH |
IB_SA_PATH_REC_REVERSIBLE,
timeout_ms, GFP_KERNEL,
cma_query_handler, work, &id_priv->query);
id_priv->id.port_num, &path_rec,
comp_mask, timeout_ms,
GFP_KERNEL, cma_query_handler,
work, &id_priv->query);
return (id_priv->query_id < 0) ? id_priv->query_id : 0;
}
......
......@@ -120,12 +120,12 @@ static struct ib_device *__ib_device_get_by_name(const char *name)
static int alloc_name(char *name)
{
long *inuse;
unsigned long *inuse;
char buf[IB_DEVICE_NAME_MAX];
struct ib_device *device;
int i;
inuse = (long *) get_zeroed_page(GFP_KERNEL);
inuse = (unsigned long *) get_zeroed_page(GFP_KERNEL);
if (!inuse)
return -ENOMEM;
......
......@@ -152,7 +152,7 @@ static void ib_fmr_batch_release(struct ib_fmr_pool *pool)
#ifdef DEBUG
if (fmr->ref_count !=0) {
printk(KERN_WARNING PFX "Unmapping FMR 0x%08x with ref count %d",
printk(KERN_WARNING PFX "Unmapping FMR 0x%08x with ref count %d\n",
fmr, fmr->ref_count);
}
#endif
......@@ -170,7 +170,7 @@ static void ib_fmr_batch_release(struct ib_fmr_pool *pool)
ret = ib_unmap_fmr(&fmr_list);
if (ret)
printk(KERN_WARNING PFX "ib_unmap_fmr returned %d", ret);
printk(KERN_WARNING PFX "ib_unmap_fmr returned %d\n", ret);
spin_lock_irq(&pool->pool_lock);
list_splice(&unmap_list, &pool->free_list);
......@@ -235,13 +235,13 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd,
attr = kmalloc(sizeof *attr, GFP_KERNEL);
if (!attr) {
printk(KERN_WARNING PFX "couldn't allocate device attr struct");
printk(KERN_WARNING PFX "couldn't allocate device attr struct\n");
return ERR_PTR(-ENOMEM);
}
ret = ib_query_device(device, attr);
if (ret) {
printk(KERN_WARNING PFX "couldn't query device: %d", ret);
printk(KERN_WARNING PFX "couldn't query device: %d\n", ret);
kfree(attr);
return ERR_PTR(ret);
}
......@@ -255,7 +255,7 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd,
pool = kmalloc(sizeof *pool, GFP_KERNEL);
if (!pool) {
printk(KERN_WARNING PFX "couldn't allocate pool struct");
printk(KERN_WARNING PFX "couldn't allocate pool struct\n");
return ERR_PTR(-ENOMEM);
}
......@@ -272,7 +272,7 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd,
kmalloc(IB_FMR_HASH_SIZE * sizeof *pool->cache_bucket,
GFP_KERNEL);
if (!pool->cache_bucket) {
printk(KERN_WARNING PFX "Failed to allocate cache in pool");
printk(KERN_WARNING PFX "Failed to allocate cache in pool\n");
ret = -ENOMEM;
goto out_free_pool;
}
......@@ -296,7 +296,7 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd,
"ib_fmr(%s)",
device->name);
if (IS_ERR(pool->thread)) {
printk(KERN_WARNING PFX "couldn't start cleanup thread");
printk(KERN_WARNING PFX "couldn't start cleanup thread\n");
ret = PTR_ERR(pool->thread);
goto out_free_pool;
}
......@@ -314,7 +314,7 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd,
GFP_KERNEL);
if (!fmr) {
printk(KERN_WARNING PFX "failed to allocate fmr "
"struct for FMR %d", i);
"struct for FMR %d\n", i);
goto out_fail;
}
......@@ -326,7 +326,7 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd,
fmr->fmr = ib_alloc_fmr(pd, params->access, &fmr_attr);
if (IS_ERR(fmr->fmr)) {
printk(KERN_WARNING PFX "fmr_create failed "
"for FMR %d", i);
"for FMR %d\n", i);
kfree(fmr);
goto out_fail;
}
......@@ -381,7 +381,7 @@ void ib_destroy_fmr_pool(struct ib_fmr_pool *pool)
}
if (i < pool->pool_size)
printk(KERN_WARNING PFX "pool still has %d regions registered",
printk(KERN_WARNING PFX "pool still has %d regions registered\n",
pool->pool_size - i);
kfree(pool->cache_bucket);
......@@ -518,7 +518,7 @@ int ib_fmr_pool_unmap(struct ib_pool_fmr *fmr)
#ifdef DEBUG
if (fmr->ref_count < 0)
printk(KERN_WARNING PFX "FMR %p has ref count %d < 0",
printk(KERN_WARNING PFX "FMR %p has ref count %d < 0\n",
fmr, fmr->ref_count);
#endif
......
......@@ -196,7 +196,7 @@ static void queue_join(struct mcast_member *member)
unsigned long flags;
spin_lock_irqsave(&group->lock, flags);
list_add(&member->list, &group->pending_list);
list_add_tail(&member->list, &group->pending_list);
if (group->state == MCAST_IDLE) {
group->state = MCAST_BUSY;
atomic_inc(&group->refcount);
......
......@@ -123,14 +123,10 @@ static u32 tid;
.field_name = "sa_path_rec:" #field
static const struct ib_field path_rec_table[] = {
{ RESERVED,
{ PATH_REC_FIELD(service_id),
.offset_words = 0,
.offset_bits = 0,
.size_bits = 32 },
{ RESERVED,
.offset_words = 1,
.offset_bits = 0,
.size_bits = 32 },
.size_bits = 64 },
{ PATH_REC_FIELD(dgid),
.offset_words = 2,
.offset_bits = 0,
......@@ -179,7 +175,7 @@ static const struct ib_field path_rec_table[] = {
.offset_words = 12,
.offset_bits = 16,
.size_bits = 16 },
{ RESERVED,
{ PATH_REC_FIELD(qos_class),
.offset_words = 13,
.offset_bits = 0,
.size_bits = 12 },
......@@ -531,7 +527,7 @@ static int alloc_mad(struct ib_sa_query *query, gfp_t gfp_mask)
query->sm_ah->pkey_index,
0, IB_MGMT_SA_HDR, IB_MGMT_SA_DATA,
gfp_mask);
if (!query->mad_buf) {
if (IS_ERR(query->mad_buf)) {
kref_put(&query->sm_ah->ref, free_sm_ah);
return -ENOMEM;
}
......
......@@ -792,6 +792,78 @@ static ssize_t ucma_init_qp_attr(struct ucma_file *file,
return ret;
}
static int ucma_set_option_id(struct ucma_context *ctx, int optname,
void *optval, size_t optlen)
{
int ret = 0;
switch (optname) {
case RDMA_OPTION_ID_TOS:
if (optlen != sizeof(u8)) {
ret = -EINVAL;
break;
}
rdma_set_service_type(ctx->cm_id, *((u8 *) optval));
break;
default:
ret = -ENOSYS;
}
return ret;
}
static int ucma_set_option_level(struct ucma_context *ctx, int level,
int optname, void *optval, size_t optlen)
{
int ret;
switch (level) {
case RDMA_OPTION_ID:
ret = ucma_set_option_id(ctx, optname, optval, optlen);
break;
default:
ret = -ENOSYS;
}
return ret;
}
static ssize_t ucma_set_option(struct ucma_file *file, const char __user *inbuf,
int in_len, int out_len)
{
struct rdma_ucm_set_option cmd;
struct ucma_context *ctx;
void *optval;
int ret;
if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
return -EFAULT;
ctx = ucma_get_ctx(file, cmd.id);
if (IS_ERR(ctx))
return PTR_ERR(ctx);
optval = kmalloc(cmd.optlen, GFP_KERNEL);
if (!optval) {
ret = -ENOMEM;
goto out1;
}
if (copy_from_user(optval, (void __user *) (unsigned long) cmd.optval,
cmd.optlen)) {
ret = -EFAULT;
goto out2;
}
ret = ucma_set_option_level(ctx, cmd.level, cmd.optname, optval,
cmd.optlen);
out2:
kfree(optval);
out1:
ucma_put_ctx(ctx);
return ret;
}
static ssize_t ucma_notify(struct ucma_file *file, const char __user *inbuf,
int in_len, int out_len)
{
......@@ -936,7 +1008,7 @@ static ssize_t (*ucma_cmd_table[])(struct ucma_file *file,
[RDMA_USER_CM_CMD_INIT_QP_ATTR] = ucma_init_qp_attr,
[RDMA_USER_CM_CMD_GET_EVENT] = ucma_get_event,
[RDMA_USER_CM_CMD_GET_OPTION] = NULL,
[RDMA_USER_CM_CMD_SET_OPTION] = NULL,
[RDMA_USER_CM_CMD_SET_OPTION] = ucma_set_option,
[RDMA_USER_CM_CMD_NOTIFY] = ucma_notify,
[RDMA_USER_CM_CMD_JOIN_MCAST] = ucma_join_multicast,
[RDMA_USER_CM_CMD_LEAVE_MCAST] = ucma_leave_multicast,
......
......@@ -37,6 +37,7 @@
#include <linux/mm.h>
#include <linux/dma-mapping.h>
#include <linux/sched.h>
#include <linux/hugetlb.h>
#include "uverbs.h"
......@@ -75,6 +76,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
{
struct ib_umem *umem;
struct page **page_list;
struct vm_area_struct **vma_list;
struct ib_umem_chunk *chunk;
unsigned long locked;
unsigned long lock_limit;
......@@ -104,6 +106,9 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
*/
umem->writable = !!(access & ~IB_ACCESS_REMOTE_READ);
/* We assume the memory is from hugetlb until proved otherwise */
umem->hugetlb = 1;
INIT_LIST_HEAD(&umem->chunk_list);
page_list = (struct page **) __get_free_page(GFP_KERNEL);
......@@ -112,6 +117,14 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
return ERR_PTR(-ENOMEM);
}
/*
* if we can't alloc the vma_list, it's not so bad;
* just assume the memory is not hugetlb memory
*/
vma_list = (struct vm_area_struct **) __get_free_page(GFP_KERNEL);
if (!vma_list)
umem->hugetlb = 0;
npages = PAGE_ALIGN(size + umem->offset) >> PAGE_SHIFT;
down_write(&current->mm->mmap_sem);
......@@ -131,7 +144,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
ret = get_user_pages(current, current->mm, cur_base,
min_t(int, npages,
PAGE_SIZE / sizeof (struct page *)),
1, !umem->writable, page_list, NULL);
1, !umem->writable, page_list, vma_list);
if (ret < 0)
goto out;
......@@ -152,6 +165,9 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
chunk->nents = min_t(int, ret, IB_UMEM_MAX_PAGE_CHUNK);
for (i = 0; i < chunk->nents; ++i) {
if (vma_list &&
!is_vm_hugetlb_page(vma_list[i + off]))
umem->hugetlb = 0;
chunk->page_list[i].page = page_list[i + off];
chunk->page_list[i].offset = 0;
chunk->page_list[i].length = PAGE_SIZE;
......@@ -186,6 +202,8 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
current->mm->locked_vm = locked;
up_write(&current->mm->mmap_sem);
if (vma_list)
free_page((unsigned long) vma_list);
free_page((unsigned long) page_list);
return ret < 0 ? ERR_PTR(ret) : umem;
......
......@@ -44,6 +44,7 @@
#include <linux/poll.h>
#include <linux/rwsem.h>
#include <linux/kref.h>
#include <linux/compat.h>
#include <asm/uaccess.h>
#include <asm/semaphore.h>
......@@ -118,6 +119,8 @@ struct ib_umad_file {
wait_queue_head_t recv_wait;
struct ib_mad_agent *agent[IB_UMAD_MAX_AGENTS];
int agents_dead;
u8 use_pkey_index;
u8 already_used;
};
struct ib_umad_packet {
......@@ -147,6 +150,12 @@ static void ib_umad_release_dev(struct kref *ref)
kfree(dev);
}
static int hdr_size(struct ib_umad_file *file)
{
return file->use_pkey_index ? sizeof (struct ib_user_mad_hdr) :
sizeof (struct ib_user_mad_hdr_old);
}
/* caller must hold port->mutex at least for reading */
static struct ib_mad_agent *__get_agent(struct ib_umad_file *file, int id)
{
......@@ -221,13 +230,13 @@ static void recv_handler(struct ib_mad_agent *agent,
packet->length = mad_recv_wc->mad_len;
packet->recv_wc = mad_recv_wc;
packet->mad.hdr.status = 0;
packet->mad.hdr.length = sizeof (struct ib_user_mad) +
mad_recv_wc->mad_len;
packet->mad.hdr.qpn = cpu_to_be32(mad_recv_wc->wc->src_qp);
packet->mad.hdr.lid = cpu_to_be16(mad_recv_wc->wc->slid);
packet->mad.hdr.sl = mad_recv_wc->wc->sl;
packet->mad.hdr.path_bits = mad_recv_wc->wc->dlid_path_bits;
packet->mad.hdr.status = 0;
packet->mad.hdr.length = hdr_size(file) + mad_recv_wc->mad_len;
packet->mad.hdr.qpn = cpu_to_be32(mad_recv_wc->wc->src_qp);
packet->mad.hdr.lid = cpu_to_be16(mad_recv_wc->wc->slid);
packet->mad.hdr.sl = mad_recv_wc->wc->sl;
packet->mad.hdr.path_bits = mad_recv_wc->wc->dlid_path_bits;
packet->mad.hdr.pkey_index = mad_recv_wc->wc->pkey_index;
packet->mad.hdr.grh_present = !!(mad_recv_wc->wc->wc_flags & IB_WC_GRH);
if (packet->mad.hdr.grh_present) {
struct ib_ah_attr ah_attr;
......@@ -253,8 +262,8 @@ static void recv_handler(struct ib_mad_agent *agent,
ib_free_recv_mad(mad_recv_wc);
}
static ssize_t copy_recv_mad(char __user *buf, struct ib_umad_packet *packet,
size_t count)
static ssize_t copy_recv_mad(struct ib_umad_file *file, char __user *buf,
struct ib_umad_packet *packet, size_t count)
{
struct ib_mad_recv_buf *recv_buf;
int left, seg_payload, offset, max_seg_payload;
......@@ -262,15 +271,15 @@ static ssize_t copy_recv_mad(char __user *buf, struct ib_umad_packet *packet,
/* We need enough room to copy the first (or only) MAD segment. */
recv_buf = &packet->recv_wc->recv_buf;
if ((packet->length <= sizeof (*recv_buf->mad) &&
count < sizeof (packet->mad) + packet->length) ||
count < hdr_size(file) + packet->length) ||
(packet->length > sizeof (*recv_buf->mad) &&
count < sizeof (packet->mad) + sizeof (*recv_buf->mad)))
count < hdr_size(file) + sizeof (*recv_buf->mad)))
return -EINVAL;
if (copy_to_user(buf, &packet->mad, sizeof (packet->mad)))
if (copy_to_user(buf, &packet->mad, hdr_size(file)))
return -EFAULT;
buf += sizeof (packet->mad);
buf += hdr_size(file);
seg_payload = min_t(int, packet->length, sizeof (*recv_buf->mad));
if (copy_to_user(buf, recv_buf->mad, seg_payload))
return -EFAULT;
......@@ -280,7 +289,7 @@ static ssize_t copy_recv_mad(char __user *buf, struct ib_umad_packet *packet,
* Multipacket RMPP MAD message. Copy remainder of message.
* Note that last segment may have a shorter payload.
*/
if (count < sizeof (packet->mad) + packet->length) {
if (count < hdr_size(file) + packet->length) {
/*
* The buffer is too small, return the first RMPP segment,
* which includes the RMPP message length.
......@@ -300,18 +309,23 @@ static ssize_t copy_recv_mad(char __user *buf, struct ib_umad_packet *packet,
return -EFAULT;
}
}
return sizeof (packet->mad) + packet->length;
return hdr_size(file) + packet->length;
}
static ssize_t copy_send_mad(char __user *buf, struct ib_umad_packet *packet,
size_t count)
static ssize_t copy_send_mad(struct ib_umad_file *file, char __user *buf,
struct ib_umad_packet *packet, size_t count)
{
ssize_t size = sizeof (packet->mad) + packet->length;
ssize_t size = hdr_size(file) + packet->length;
if (count < size)
return -EINVAL;
if (copy_to_user(buf, &packet->mad, size))
if (copy_to_user(buf, &packet->mad, hdr_size(file)))
return -EFAULT;
buf += hdr_size(file);
if (copy_to_user(buf, packet->mad.data, packet->length))
return -EFAULT;
return size;
......@@ -324,7 +338,7 @@ static ssize_t ib_umad_read(struct file *filp, char __user *buf,
struct ib_umad_packet *packet;
ssize_t ret;
if (count < sizeof (struct ib_user_mad))
if (count < hdr_size(file))
return -EINVAL;
spin_lock_irq(&file->recv_lock);
......@@ -348,9 +362,9 @@ static ssize_t ib_umad_read(struct file *filp, char __user *buf,
spin_unlock_irq(&file->recv_lock);
if (packet->recv_wc)
ret = copy_recv_mad(buf, packet, count);
ret = copy_recv_mad(file, buf, packet, count);
else
ret = copy_send_mad(buf, packet, count);
ret = copy_send_mad(file, buf, packet, count);
if (ret < 0) {
/* Requeue packet */
......@@ -442,15 +456,14 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
__be64 *tid;
int ret, data_len, hdr_len, copy_offset, rmpp_active;
if (count < sizeof (struct ib_user_mad) + IB_MGMT_RMPP_HDR)
if (count < hdr_size(file) + IB_MGMT_RMPP_HDR)
return -EINVAL;
packet = kzalloc(sizeof *packet + IB_MGMT_RMPP_HDR, GFP_KERNEL);
if (!packet)
return -ENOMEM;
if (copy_from_user(&packet->mad, buf,
sizeof (struct ib_user_mad) + IB_MGMT_RMPP_HDR)) {
if (copy_from_user(&packet->mad, buf, hdr_size(file))) {
ret = -EFAULT;
goto err;
}
......@@ -461,6 +474,13 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
goto err;
}
buf += hdr_size(file);
if (copy_from_user(packet->mad.data, buf, IB_MGMT_RMPP_HDR)) {
ret = -EFAULT;
goto err;
}
down_read(&file->port->mutex);
agent = __get_agent(file, packet->mad.hdr.id);
......@@ -500,11 +520,11 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
IB_MGMT_RMPP_FLAG_ACTIVE;
}
data_len = count - sizeof (struct ib_user_mad) - hdr_len;
data_len = count - hdr_size(file) - hdr_len;
packet->msg = ib_create_send_mad(agent,
be32_to_cpu(packet->mad.hdr.qpn),
0, rmpp_active, hdr_len,
data_len, GFP_KERNEL);
packet->mad.hdr.pkey_index, rmpp_active,
hdr_len, data_len, GFP_KERNEL);
if (IS_ERR(packet->msg)) {
ret = PTR_ERR(packet->msg);
goto err_ah;
......@@ -517,7 +537,6 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
/* Copy MAD header. Any RMPP header is already in place. */
memcpy(packet->msg->mad, packet->mad.data, IB_MGMT_MAD_HDR);
buf += sizeof (struct ib_user_mad);
if (!rmpp_active) {
if (copy_from_user(packet->msg->mad + copy_offset,
......@@ -589,7 +608,8 @@ static unsigned int ib_umad_poll(struct file *filp, struct poll_table_struct *wa
return mask;
}
static int ib_umad_reg_agent(struct ib_umad_file *file, unsigned long arg)
static int ib_umad_reg_agent(struct ib_umad_file *file, void __user *arg,
int compat_method_mask)
{
struct ib_user_mad_reg_req ureq;
struct ib_mad_reg_req req;
......@@ -604,7 +624,7 @@ static int ib_umad_reg_agent(struct ib_umad_file *file, unsigned long arg)
goto out;
}
if (copy_from_user(&ureq, (void __user *) arg, sizeof ureq)) {
if (copy_from_user(&ureq, arg, sizeof ureq)) {
ret = -EFAULT;
goto out;
}
......@@ -625,8 +645,18 @@ static int ib_umad_reg_agent(struct ib_umad_file *file, unsigned long arg)
if (ureq.mgmt_class) {
req.mgmt_class = ureq.mgmt_class;
req.mgmt_class_version = ureq.mgmt_class_version;
memcpy(req.method_mask, ureq.method_mask, sizeof req.method_mask);
memcpy(req.oui, ureq.oui, sizeof req.oui);
memcpy(req.oui, ureq.oui, sizeof req.oui);
if (compat_method_mask) {
u32 *umm = (u32 *) ureq.method_mask;
int i;
for (i = 0; i < BITS_TO_LONGS(IB_MGMT_MAX_METHODS); ++i)
req.method_mask[i] =
umm[i * 2] | ((u64) umm[i * 2 + 1] << 32);
} else
memcpy(req.method_mask, ureq.method_mask,
sizeof req.method_mask);
}
agent = ib_register_mad_agent(file->port->ib_dev, file->port->port_num,
......@@ -646,6 +676,16 @@ static int ib_umad_reg_agent(struct ib_umad_file *file, unsigned long arg)
goto out;
}
if (!file->already_used) {
file->already_used = 1;
if (!file->use_pkey_index) {
printk(KERN_WARNING "user_mad: process %s did not enable "
"P_Key index support.\n", current->comm);
printk(KERN_WARNING "user_mad: Documentation/infiniband/user_mad.txt "
"has info on the new ABI.\n");
}
}
file->agent[agent_id] = agent;
ret = 0;
......@@ -654,13 +694,13 @@ static int ib_umad_reg_agent(struct ib_umad_file *file, unsigned long arg)
return ret;
}
static int ib_umad_unreg_agent(struct ib_umad_file *file, unsigned long arg)
static int ib_umad_unreg_agent(struct ib_umad_file *file, u32 __user *arg)
{
struct ib_mad_agent *agent = NULL;
u32 id;
int ret = 0;
if (get_user(id, (u32 __user *) arg))
if (get_user(id, arg))
return -EFAULT;
down_write(&file->port->mutex);
......@@ -682,18 +722,51 @@ static int ib_umad_unreg_agent(struct ib_umad_file *file, unsigned long arg)
return ret;
}
static long ib_umad_enable_pkey(struct ib_umad_file *file)
{
int ret = 0;
down_write(&file->port->mutex);
if (file->already_used)
ret = -EINVAL;
else
file->use_pkey_index = 1;
up_write(&file->port->mutex);
return ret;
}
static long ib_umad_ioctl(struct file *filp, unsigned int cmd,
unsigned long arg)
{
switch (cmd) {
case IB_USER_MAD_REGISTER_AGENT:
return ib_umad_reg_agent(filp->private_data, arg);
return ib_umad_reg_agent(filp->private_data, (void __user *) arg, 0);
case IB_USER_MAD_UNREGISTER_AGENT:
return ib_umad_unreg_agent(filp->private_data, (__u32 __user *) arg);
case IB_USER_MAD_ENABLE_PKEY:
return ib_umad_enable_pkey(filp->private_data);
default:
return -ENOIOCTLCMD;
}
}
#ifdef CONFIG_COMPAT
static long ib_umad_compat_ioctl(struct file *filp, unsigned int cmd,
unsigned long arg)
{
switch (cmd) {
case IB_USER_MAD_REGISTER_AGENT:
return ib_umad_reg_agent(filp->private_data, compat_ptr(arg), 1);
case IB_USER_MAD_UNREGISTER_AGENT:
return ib_umad_unreg_agent(filp->private_data, arg);
return ib_umad_unreg_agent(filp->private_data, compat_ptr(arg));
case IB_USER_MAD_ENABLE_PKEY:
return ib_umad_enable_pkey(filp->private_data);
default:
return -ENOIOCTLCMD;
}
}
#endif
static int ib_umad_open(struct inode *inode, struct file *filp)
{
......@@ -782,7 +855,9 @@ static const struct file_operations umad_fops = {
.write = ib_umad_write,
.poll = ib_umad_poll,
.unlocked_ioctl = ib_umad_ioctl,
.compat_ioctl = ib_umad_ioctl,
#ifdef CONFIG_COMPAT
.compat_ioctl = ib_umad_compat_ioctl,
#endif
.open = ib_umad_open,
.release = ib_umad_close
};
......
......@@ -148,7 +148,6 @@ void idr_remove_uobj(struct idr *idp, struct ib_uobject *uobj);
struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file,
int is_async, int *fd);
void ib_uverbs_release_event_file(struct kref *ref);
struct ib_uverbs_event_file *ib_uverbs_lookup_comp_file(int fd);
void ib_uverbs_release_ucq(struct ib_uverbs_file *file,
......
......@@ -125,6 +125,14 @@ static void ib_uverbs_release_dev(struct kref *ref)
complete(&dev->comp);
}
static void ib_uverbs_release_event_file(struct kref *ref)
{
struct ib_uverbs_event_file *file =
container_of(ref, struct ib_uverbs_event_file, ref);
kfree(file);
}
void ib_uverbs_release_ucq(struct ib_uverbs_file *file,
struct ib_uverbs_event_file *ev_file,
struct ib_ucq_object *uobj)
......@@ -331,14 +339,6 @@ static unsigned int ib_uverbs_event_poll(struct file *filp,
return pollflags;
}
void ib_uverbs_release_event_file(struct kref *ref)
{
struct ib_uverbs_event_file *file =
container_of(ref, struct ib_uverbs_event_file, ref);
kfree(file);
}
static int ib_uverbs_event_fasync(int fd, struct file *filp, int on)
{
struct ib_uverbs_event_file *file = filp->private_data;
......
......@@ -63,37 +63,37 @@ static char *states[] = {
};
static int ep_timeout_secs = 10;
module_param(ep_timeout_secs, int, 0444);
module_param(ep_timeout_secs, int, 0644);
MODULE_PARM_DESC(ep_timeout_secs, "CM Endpoint operation timeout "
"in seconds (default=10)");
static int mpa_rev = 1;
module_param(mpa_rev, int, 0444);
module_param(mpa_rev, int, 0644);
MODULE_PARM_DESC(mpa_rev, "MPA Revision, 0 supports amso1100, "
"1 is spec compliant. (default=1)");
static int markers_enabled = 0;
module_param(markers_enabled, int, 0444);
module_param(markers_enabled, int, 0644);
MODULE_PARM_DESC(markers_enabled, "Enable MPA MARKERS (default(0)=disabled)");
static int crc_enabled = 1;
module_param(crc_enabled, int, 0444);
module_param(crc_enabled, int, 0644);
MODULE_PARM_DESC(crc_enabled, "Enable MPA CRC (default(1)=enabled)");
static int rcv_win = 256 * 1024;
module_param(rcv_win, int, 0444);
module_param(rcv_win, int, 0644);
MODULE_PARM_DESC(rcv_win, "TCP receive window in bytes (default=256)");
static int snd_win = 32 * 1024;
module_param(snd_win, int, 0444);
module_param(snd_win, int, 0644);
MODULE_PARM_DESC(snd_win, "TCP send window in bytes (default=32KB)");
static unsigned int nocong = 0;
module_param(nocong, uint, 0444);
module_param(nocong, uint, 0644);
MODULE_PARM_DESC(nocong, "Turn off congestion control (default=0)");
static unsigned int cong_flavor = 1;
module_param(cong_flavor, uint, 0444);
module_param(cong_flavor, uint, 0644);
MODULE_PARM_DESC(cong_flavor, "TCP Congestion control flavor (default=1)");
static void process_work(struct work_struct *work);
......
......@@ -53,6 +53,7 @@ struct ehca_pd;
struct ehca_av;
#include <linux/wait.h>
#include <linux/mutex.h>
#include <rdma/ib_verbs.h>
#include <rdma/ib_user_verbs.h>
......@@ -99,10 +100,10 @@ struct ehca_sport {
struct ehca_sma_attr saved_attr;
};
#define HCA_CAP_MR_PGSIZE_4K 1
#define HCA_CAP_MR_PGSIZE_64K 2
#define HCA_CAP_MR_PGSIZE_1M 4
#define HCA_CAP_MR_PGSIZE_16M 8
#define HCA_CAP_MR_PGSIZE_4K 0x80000000
#define HCA_CAP_MR_PGSIZE_64K 0x40000000
#define HCA_CAP_MR_PGSIZE_1M 0x20000000
#define HCA_CAP_MR_PGSIZE_16M 0x10000000
struct ehca_shca {
struct ib_device ib_device;
......@@ -337,6 +338,8 @@ struct ehca_create_cq_resp {
u32 cq_number;
u32 token;
struct ipzu_queue_resp ipz_queue;
u32 fw_handle_ofs;
u32 dummy;
};
struct ehca_create_qp_resp {
......@@ -347,7 +350,8 @@ struct ehca_create_qp_resp {
u32 qkey;
/* qp_num assigned by ehca: sqp0/1 may have got different numbers */
u32 real_qp_num;
u32 dummy; /* padding for 8 byte alignment */
u32 fw_handle_ofs;
u32 dummy;
struct ipzu_queue_resp ipz_squeue;
struct ipzu_queue_resp ipz_rqueue;
};
......
......@@ -166,7 +166,6 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector,
write_lock_irqsave(&ehca_cq_idr_lock, flags);
ret = idr_get_new(&ehca_cq_idr, my_cq, &my_cq->token);
write_unlock_irqrestore(&ehca_cq_idr_lock, flags);
} while (ret == -EAGAIN);
if (ret) {
......@@ -176,6 +175,12 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector,
goto create_cq_exit1;
}
if (my_cq->token > 0x1FFFFFF) {
cq = ERR_PTR(-ENOMEM);
ehca_err(device, "Invalid number of cq. device=%p", device);
goto create_cq_exit2;
}
/*
* CQs maximum depth is 4GB-64, but we need additional 20 as buffer
* for receiving errors CQEs.
......@@ -185,7 +190,7 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector,
if (h_ret != H_SUCCESS) {
ehca_err(device, "hipz_h_alloc_resource_cq() failed "
"h_ret=%lx device=%p", h_ret, device);
"h_ret=%li device=%p", h_ret, device);
cq = ERR_PTR(ehca2ib_return_code(h_ret));
goto create_cq_exit2;
}
......@@ -193,7 +198,7 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector,
ipz_rc = ipz_queue_ctor(NULL, &my_cq->ipz_queue, param.act_pages,
EHCA_PAGESIZE, sizeof(struct ehca_cqe), 0, 0);
if (!ipz_rc) {
ehca_err(device, "ipz_queue_ctor() failed ipz_rc=%x device=%p",
ehca_err(device, "ipz_queue_ctor() failed ipz_rc=%i device=%p",
ipz_rc, device);
cq = ERR_PTR(-EINVAL);
goto create_cq_exit3;
......@@ -221,7 +226,7 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector,
if (h_ret < H_SUCCESS) {
ehca_err(device, "hipz_h_register_rpage_cq() failed "
"ehca_cq=%p cq_num=%x h_ret=%lx counter=%i "
"ehca_cq=%p cq_num=%x h_ret=%li counter=%i "
"act_pages=%i", my_cq, my_cq->cq_number,
h_ret, counter, param.act_pages);
cq = ERR_PTR(-EINVAL);
......@@ -233,7 +238,7 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector,
if ((h_ret != H_SUCCESS) || vpage) {
ehca_err(device, "Registration of pages not "
"complete ehca_cq=%p cq_num=%x "
"h_ret=%lx", my_cq, my_cq->cq_number,
"h_ret=%li", my_cq, my_cq->cq_number,
h_ret);
cq = ERR_PTR(-EAGAIN);
goto create_cq_exit4;
......@@ -241,7 +246,7 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector,
} else {
if (h_ret != H_PAGE_REGISTERED) {
ehca_err(device, "Registration of page failed "
"ehca_cq=%p cq_num=%x h_ret=%lx"
"ehca_cq=%p cq_num=%x h_ret=%li"
"counter=%i act_pages=%i",
my_cq, my_cq->cq_number,
h_ret, counter, param.act_pages);
......@@ -276,6 +281,8 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector,
resp.ipz_queue.queue_length = ipz_queue->queue_length;
resp.ipz_queue.pagesize = ipz_queue->pagesize;
resp.ipz_queue.toggle_state = ipz_queue->toggle_state;
resp.fw_handle_ofs = (u32)
(my_cq->galpas.user.fw_handle & (PAGE_SIZE - 1));
if (ib_copy_to_udata(udata, &resp, sizeof(resp))) {
ehca_err(device, "Copy to udata failed.");
goto create_cq_exit4;
......@@ -291,7 +298,7 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector,
h_ret = hipz_h_destroy_cq(adapter_handle, my_cq, 1);
if (h_ret != H_SUCCESS)
ehca_err(device, "hipz_h_destroy_cq() failed ehca_cq=%p "
"cq_num=%x h_ret=%lx", my_cq, my_cq->cq_number, h_ret);
"cq_num=%x h_ret=%li", my_cq, my_cq->cq_number, h_ret);
create_cq_exit2:
write_lock_irqsave(&ehca_cq_idr_lock, flags);
......@@ -355,7 +362,7 @@ int ehca_destroy_cq(struct ib_cq *cq)
cq_num);
}
if (h_ret != H_SUCCESS) {
ehca_err(device, "hipz_h_destroy_cq() failed h_ret=%lx "
ehca_err(device, "hipz_h_destroy_cq() failed h_ret=%li "
"ehca_cq=%p cq_num=%x", h_ret, my_cq, cq_num);
return ehca2ib_return_code(h_ret);
}
......
......@@ -82,17 +82,17 @@ int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props)
props->vendor_id = rblock->vendor_id >> 8;
props->vendor_part_id = rblock->vendor_part_id >> 16;
props->hw_ver = rblock->hw_ver;
props->max_qp = min_t(int, rblock->max_qp, INT_MAX);
props->max_qp_wr = min_t(int, rblock->max_wqes_wq, INT_MAX);
props->max_sge = min_t(int, rblock->max_sge, INT_MAX);
props->max_sge_rd = min_t(int, rblock->max_sge_rd, INT_MAX);
props->max_cq = min_t(int, rblock->max_cq, INT_MAX);
props->max_cqe = min_t(int, rblock->max_cqe, INT_MAX);
props->max_mr = min_t(int, rblock->max_mr, INT_MAX);
props->max_mw = min_t(int, rblock->max_mw, INT_MAX);
props->max_pd = min_t(int, rblock->max_pd, INT_MAX);
props->max_ah = min_t(int, rblock->max_ah, INT_MAX);
props->max_fmr = min_t(int, rblock->max_mr, INT_MAX);
props->max_qp = min_t(unsigned, rblock->max_qp, INT_MAX);
props->max_qp_wr = min_t(unsigned, rblock->max_wqes_wq, INT_MAX);
props->max_sge = min_t(unsigned, rblock->max_sge, INT_MAX);
props->max_sge_rd = min_t(unsigned, rblock->max_sge_rd, INT_MAX);
props->max_cq = min_t(unsigned, rblock->max_cq, INT_MAX);
props->max_cqe = min_t(unsigned, rblock->max_cqe, INT_MAX);
props->max_mr = min_t(unsigned, rblock->max_mr, INT_MAX);
props->max_mw = min_t(unsigned, rblock->max_mw, INT_MAX);
props->max_pd = min_t(unsigned, rblock->max_pd, INT_MAX);
props->max_ah = min_t(unsigned, rblock->max_ah, INT_MAX);
props->max_fmr = min_t(unsigned, rblock->max_mr, INT_MAX);
if (EHCA_BMASK_GET(HCA_CAP_SRQ, shca->hca_cap)) {
props->max_srq = props->max_qp;
......@@ -104,15 +104,15 @@ int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props)
props->local_ca_ack_delay
= rblock->local_ca_ack_delay;
props->max_raw_ipv6_qp
= min_t(int, rblock->max_raw_ipv6_qp, INT_MAX);
= min_t(unsigned, rblock->max_raw_ipv6_qp, INT_MAX);
props->max_raw_ethy_qp
= min_t(int, rblock->max_raw_ethy_qp, INT_MAX);
= min_t(unsigned, rblock->max_raw_ethy_qp, INT_MAX);
props->max_mcast_grp
= min_t(int, rblock->max_mcast_grp, INT_MAX);
= min_t(unsigned, rblock->max_mcast_grp, INT_MAX);
props->max_mcast_qp_attach
= min_t(int, rblock->max_mcast_qp_attach, INT_MAX);
= min_t(unsigned, rblock->max_mcast_qp_attach, INT_MAX);
props->max_total_mcast_qp_attach
= min_t(int, rblock->max_total_mcast_qp_attach, INT_MAX);
= min_t(unsigned, rblock->max_total_mcast_qp_attach, INT_MAX);
/* translate device capabilities */
props->device_cap_flags = IB_DEVICE_SYS_IMAGE_GUID |
......@@ -352,7 +352,7 @@ int ehca_modify_port(struct ib_device *ibdev,
hret = hipz_h_modify_port(shca->ipz_hca_handle, port,
cap, props->init_type, port_modify_mask);
if (hret != H_SUCCESS) {
ehca_err(&shca->ib_device, "Modify port failed hret=%lx",
ehca_err(&shca->ib_device, "Modify port failed h_ret=%li",
hret);
ret = -EINVAL;
}
......
......@@ -69,9 +69,6 @@
static void queue_comp_task(struct ehca_cq *__cq);
static struct ehca_comp_pool *pool;
#ifdef CONFIG_HOTPLUG_CPU
static struct notifier_block comp_pool_callback_nb;
#endif
static inline void comp_event_callback(struct ehca_cq *cq)
{
......@@ -294,8 +291,8 @@ static void parse_identifier(struct ehca_shca *shca, u64 eqe)
case 0x11: /* unaffiliated access error */
ehca_err(&shca->ib_device, "Unaffiliated access error.");
break;
case 0x12: /* path migrating error */
ehca_err(&shca->ib_device, "Path migration error.");
case 0x12: /* path migrating */
ehca_err(&shca->ib_device, "Path migrating.");
break;
case 0x13: /* interface trace stopped */
ehca_err(&shca->ib_device, "Interface trace stopped.");
......@@ -760,9 +757,7 @@ static void destroy_comp_task(struct ehca_comp_pool *pool,
kthread_stop(task);
}
#ifdef CONFIG_HOTPLUG_CPU
static void take_over_work(struct ehca_comp_pool *pool,
int cpu)
static void __cpuinit take_over_work(struct ehca_comp_pool *pool, int cpu)
{
struct ehca_cpu_comp_task *cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
LIST_HEAD(list);
......@@ -785,9 +780,9 @@ static void take_over_work(struct ehca_comp_pool *pool,
}
static int comp_pool_callback(struct notifier_block *nfb,
unsigned long action,
void *hcpu)
static int __cpuinit comp_pool_callback(struct notifier_block *nfb,
unsigned long action,
void *hcpu)
{
unsigned int cpu = (unsigned long)hcpu;
struct ehca_cpu_comp_task *cct;
......@@ -833,7 +828,11 @@ static int comp_pool_callback(struct notifier_block *nfb,
return NOTIFY_OK;
}
#endif
static struct notifier_block comp_pool_callback_nb __cpuinitdata = {
.notifier_call = comp_pool_callback,
.priority = 0,
};
int ehca_create_comp_pool(void)
{
......@@ -864,11 +863,7 @@ int ehca_create_comp_pool(void)
}
}
#ifdef CONFIG_HOTPLUG_CPU
comp_pool_callback_nb.notifier_call = comp_pool_callback;
comp_pool_callback_nb.priority = 0;
register_cpu_notifier(&comp_pool_callback_nb);
#endif
register_hotcpu_notifier(&comp_pool_callback_nb);
printk(KERN_INFO "eHCA scaling code enabled\n");
......@@ -882,9 +877,7 @@ void ehca_destroy_comp_pool(void)
if (!ehca_scaling_code)
return;
#ifdef CONFIG_HOTPLUG_CPU
unregister_cpu_notifier(&comp_pool_callback_nb);
#endif
unregister_hotcpu_notifier(&comp_pool_callback_nb);
for (i = 0; i < NR_CPUS; i++) {
if (cpu_online(i))
......
......@@ -49,10 +49,12 @@
#include "ehca_tools.h"
#include "hcp_if.h"
#define HCAD_VERSION "0024"
MODULE_LICENSE("Dual BSD/GPL");
MODULE_AUTHOR("Christoph Raisch <raisch@de.ibm.com>");
MODULE_DESCRIPTION("IBM eServer HCA InfiniBand Device Driver");
MODULE_VERSION("SVNEHCA_0023");
MODULE_VERSION(HCAD_VERSION);
int ehca_open_aqp1 = 0;
int ehca_debug_level = 0;
......@@ -65,16 +67,16 @@ int ehca_static_rate = -1;
int ehca_scaling_code = 0;
int ehca_mr_largepage = 0;
module_param_named(open_aqp1, ehca_open_aqp1, int, 0);
module_param_named(debug_level, ehca_debug_level, int, 0);
module_param_named(hw_level, ehca_hw_level, int, 0);
module_param_named(nr_ports, ehca_nr_ports, int, 0);
module_param_named(use_hp_mr, ehca_use_hp_mr, int, 0);
module_param_named(port_act_time, ehca_port_act_time, int, 0);
module_param_named(poll_all_eqs, ehca_poll_all_eqs, int, 0);
module_param_named(static_rate, ehca_static_rate, int, 0);
module_param_named(scaling_code, ehca_scaling_code, int, 0);
module_param_named(mr_largepage, ehca_mr_largepage, int, 0);
module_param_named(open_aqp1, ehca_open_aqp1, int, S_IRUGO);
module_param_named(debug_level, ehca_debug_level, int, S_IRUGO);
module_param_named(hw_level, ehca_hw_level, int, S_IRUGO);
module_param_named(nr_ports, ehca_nr_ports, int, S_IRUGO);
module_param_named(use_hp_mr, ehca_use_hp_mr, int, S_IRUGO);
module_param_named(port_act_time, ehca_port_act_time, int, S_IRUGO);
module_param_named(poll_all_eqs, ehca_poll_all_eqs, int, S_IRUGO);
module_param_named(static_rate, ehca_static_rate, int, S_IRUGO);
module_param_named(scaling_code, ehca_scaling_code, int, S_IRUGO);
module_param_named(mr_largepage, ehca_mr_largepage, int, S_IRUGO);
MODULE_PARM_DESC(open_aqp1,
"AQP1 on startup (0: no (default), 1: yes)");
......@@ -273,7 +275,7 @@ int ehca_sense_attributes(struct ehca_shca *shca)
h_ret = hipz_h_query_hca(shca->ipz_hca_handle, rblock);
if (h_ret != H_SUCCESS) {
ehca_gen_err("Cannot query device properties. h_ret=%lx",
ehca_gen_err("Cannot query device properties. h_ret=%li",
h_ret);
ret = -EPERM;
goto sense_attributes1;
......@@ -332,7 +334,7 @@ int ehca_sense_attributes(struct ehca_shca *shca)
port = (struct hipz_query_port *)rblock;
h_ret = hipz_h_query_port(shca->ipz_hca_handle, 1, port);
if (h_ret != H_SUCCESS) {
ehca_gen_err("Cannot query port properties. h_ret=%lx",
ehca_gen_err("Cannot query port properties. h_ret=%li",
h_ret);
ret = -EPERM;
goto sense_attributes1;
......@@ -380,7 +382,7 @@ int ehca_init_device(struct ehca_shca *shca)
strlcpy(shca->ib_device.name, "ehca%d", IB_DEVICE_NAME_MAX);
shca->ib_device.owner = THIS_MODULE;
shca->ib_device.uverbs_abi_ver = 7;
shca->ib_device.uverbs_abi_ver = 8;
shca->ib_device.uverbs_cmd_mask =
(1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
(1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
......@@ -526,13 +528,13 @@ static int ehca_destroy_aqp1(struct ehca_sport *sport)
ret = ib_destroy_qp(sport->ibqp_aqp1);
if (ret) {
ehca_gen_err("Cannot destroy AQP1 QP. ret=%x", ret);
ehca_gen_err("Cannot destroy AQP1 QP. ret=%i", ret);
return ret;
}
ret = ib_destroy_cq(sport->ibcq_aqp1);
if (ret)
ehca_gen_err("Cannot destroy AQP1 CQ. ret=%x", ret);
ehca_gen_err("Cannot destroy AQP1 CQ. ret=%i", ret);
return ret;
}
......@@ -728,7 +730,7 @@ static int __devinit ehca_probe(struct ibmebus_dev *dev,
ret = ehca_reg_internal_maxmr(shca, shca->pd, &shca->maxmr);
if (ret) {
ehca_err(&shca->ib_device, "Cannot create internal MR ret=%x",
ehca_err(&shca->ib_device, "Cannot create internal MR ret=%i",
ret);
goto probe5;
}
......@@ -736,7 +738,7 @@ static int __devinit ehca_probe(struct ibmebus_dev *dev,
ret = ib_register_device(&shca->ib_device);
if (ret) {
ehca_err(&shca->ib_device,
"ib_register_device() failed ret=%x", ret);
"ib_register_device() failed ret=%i", ret);
goto probe6;
}
......@@ -777,7 +779,7 @@ static int __devinit ehca_probe(struct ibmebus_dev *dev,
ret = ehca_destroy_aqp1(&shca->sport[0]);
if (ret)
ehca_err(&shca->ib_device,
"Cannot destroy AQP1 for port 1. ret=%x", ret);
"Cannot destroy AQP1 for port 1. ret=%i", ret);
probe7:
ib_unregister_device(&shca->ib_device);
......@@ -826,7 +828,7 @@ static int __devexit ehca_remove(struct ibmebus_dev *dev)
if (ret)
ehca_err(&shca->ib_device,
"Cannot destroy AQP1 for port %x "
"ret=%x", ret, i);
"ret=%i", ret, i);
}
}
......@@ -835,20 +837,20 @@ static int __devexit ehca_remove(struct ibmebus_dev *dev)
ret = ehca_dereg_internal_maxmr(shca);
if (ret)
ehca_err(&shca->ib_device,
"Cannot destroy internal MR. ret=%x", ret);
"Cannot destroy internal MR. ret=%i", ret);
ret = ehca_dealloc_pd(&shca->pd->ib_pd);
if (ret)
ehca_err(&shca->ib_device,
"Cannot destroy internal PD. ret=%x", ret);
"Cannot destroy internal PD. ret=%i", ret);
ret = ehca_destroy_eq(shca, &shca->eq);
if (ret)
ehca_err(&shca->ib_device, "Cannot destroy EQ. ret=%x", ret);
ehca_err(&shca->ib_device, "Cannot destroy EQ. ret=%i", ret);
ret = ehca_destroy_eq(shca, &shca->neq);
if (ret)
ehca_err(&shca->ib_device, "Canot destroy NEQ. ret=%x", ret);
ehca_err(&shca->ib_device, "Canot destroy NEQ. ret=%i", ret);
ib_dealloc_device(&shca->ib_device);
......@@ -909,7 +911,7 @@ int __init ehca_module_init(void)
int ret;
printk(KERN_INFO "eHCA Infiniband Device Driver "
"(Rel.: SVNEHCA_0023)\n");
"(Version " HCAD_VERSION ")\n");
ret = ehca_create_comp_pool();
if (ret) {
......
......@@ -88,7 +88,7 @@ int ehca_attach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
if (h_ret != H_SUCCESS)
ehca_err(ibqp->device,
"ehca_qp=%p qp_num=%x hipz_h_attach_mcqp() failed "
"h_ret=%lx", my_qp, ibqp->qp_num, h_ret);
"h_ret=%li", my_qp, ibqp->qp_num, h_ret);
return ehca2ib_return_code(h_ret);
}
......@@ -125,7 +125,7 @@ int ehca_detach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
if (h_ret != H_SUCCESS)
ehca_err(ibqp->device,
"ehca_qp=%p qp_num=%x hipz_h_detach_mcqp() failed "
"h_ret=%lx", my_qp, ibqp->qp_num, h_ret);
"h_ret=%li", my_qp, ibqp->qp_num, h_ret);
return ehca2ib_return_code(h_ret);
}
......@@ -51,6 +51,7 @@
#define NUM_CHUNKS(length, chunk_size) \
(((length) + (chunk_size - 1)) / (chunk_size))
/* max number of rpages (per hcall register_rpages) */
#define MAX_RPAGES 512
......@@ -64,6 +65,11 @@ enum ehca_mr_pgsize {
EHCA_MR_PGSIZE16M = 0x1000000L
};
#define EHCA_MR_PGSHIFT4K 12
#define EHCA_MR_PGSHIFT64K 16
#define EHCA_MR_PGSHIFT1M 20
#define EHCA_MR_PGSHIFT16M 24
static u32 ehca_encode_hwpage_size(u32 pgsize)
{
u32 idx = 0;
......@@ -159,7 +165,7 @@ struct ib_mr *ehca_get_dma_mr(struct ib_pd *pd, int mr_access_flags)
get_dma_mr_exit0:
if (IS_ERR(ib_mr))
ehca_err(&shca->ib_device, "rc=%lx pd=%p mr_access_flags=%x ",
ehca_err(&shca->ib_device, "h_ret=%li pd=%p mr_access_flags=%x",
PTR_ERR(ib_mr), pd, mr_access_flags);
return ib_mr;
} /* end ehca_get_dma_mr() */
......@@ -271,7 +277,7 @@ struct ib_mr *ehca_reg_phys_mr(struct ib_pd *pd,
ehca_mr_delete(e_mr);
reg_phys_mr_exit0:
if (IS_ERR(ib_mr))
ehca_err(pd->device, "rc=%lx pd=%p phys_buf_array=%p "
ehca_err(pd->device, "h_ret=%li pd=%p phys_buf_array=%p "
"num_phys_buf=%x mr_access_flags=%x iova_start=%p",
PTR_ERR(ib_mr), pd, phys_buf_array,
num_phys_buf, mr_access_flags, iova_start);
......@@ -347,17 +353,16 @@ struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
/* select proper hw_pgsize */
if (ehca_mr_largepage &&
(shca->hca_cap_mr_pgsize & HCA_CAP_MR_PGSIZE_16M)) {
if (length <= EHCA_MR_PGSIZE4K
&& PAGE_SIZE == EHCA_MR_PGSIZE4K)
hwpage_size = EHCA_MR_PGSIZE4K;
else if (length <= EHCA_MR_PGSIZE64K)
hwpage_size = EHCA_MR_PGSIZE64K;
else if (length <= EHCA_MR_PGSIZE1M)
hwpage_size = EHCA_MR_PGSIZE1M;
else
hwpage_size = EHCA_MR_PGSIZE16M;
int page_shift = PAGE_SHIFT;
if (e_mr->umem->hugetlb) {
/* determine page_shift, clamp between 4K and 16M */
page_shift = (fls64(length - 1) + 3) & ~3;
page_shift = min(max(page_shift, EHCA_MR_PGSHIFT4K),
EHCA_MR_PGSHIFT16M);
}
hwpage_size = 1UL << page_shift;
} else
hwpage_size = EHCA_MR_PGSIZE4K;
hwpage_size = EHCA_MR_PGSIZE4K; /* ehca1 only supports 4k */
ehca_dbg(pd->device, "hwpage_size=%lx", hwpage_size);
reg_user_mr_fallback:
......@@ -403,8 +408,7 @@ struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
ehca_mr_delete(e_mr);
reg_user_mr_exit0:
if (IS_ERR(ib_mr))
ehca_err(pd->device, "rc=%lx pd=%p mr_access_flags=%x"
" udata=%p",
ehca_err(pd->device, "rc=%li pd=%p mr_access_flags=%x udata=%p",
PTR_ERR(ib_mr), pd, mr_access_flags, udata);
return ib_mr;
} /* end ehca_reg_user_mr() */
......@@ -565,7 +569,7 @@ int ehca_rereg_phys_mr(struct ib_mr *mr,
spin_unlock_irqrestore(&e_mr->mrlock, sl_flags);
rereg_phys_mr_exit0:
if (ret)
ehca_err(mr->device, "ret=%x mr=%p mr_rereg_mask=%x pd=%p "
ehca_err(mr->device, "ret=%i mr=%p mr_rereg_mask=%x pd=%p "
"phys_buf_array=%p num_phys_buf=%x mr_access_flags=%x "
"iova_start=%p",
ret, mr, mr_rereg_mask, pd, phys_buf_array,
......@@ -607,7 +611,7 @@ int ehca_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr)
h_ret = hipz_h_query_mr(shca->ipz_hca_handle, e_mr, &hipzout);
if (h_ret != H_SUCCESS) {
ehca_err(mr->device, "hipz_mr_query failed, h_ret=%lx mr=%p "
ehca_err(mr->device, "hipz_mr_query failed, h_ret=%li mr=%p "
"hca_hndl=%lx mr_hndl=%lx lkey=%x",
h_ret, mr, shca->ipz_hca_handle.handle,
e_mr->ipz_mr_handle.handle, mr->lkey);
......@@ -625,7 +629,7 @@ int ehca_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr)
spin_unlock_irqrestore(&e_mr->mrlock, sl_flags);
query_mr_exit0:
if (ret)
ehca_err(mr->device, "ret=%x mr=%p mr_attr=%p",
ehca_err(mr->device, "ret=%i mr=%p mr_attr=%p",
ret, mr, mr_attr);
return ret;
} /* end ehca_query_mr() */
......@@ -667,7 +671,7 @@ int ehca_dereg_mr(struct ib_mr *mr)
/* TODO: BUSY: MR still has bound window(s) */
h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_mr);
if (h_ret != H_SUCCESS) {
ehca_err(mr->device, "hipz_free_mr failed, h_ret=%lx shca=%p "
ehca_err(mr->device, "hipz_free_mr failed, h_ret=%li shca=%p "
"e_mr=%p hca_hndl=%lx mr_hndl=%lx mr->lkey=%x",
h_ret, shca, e_mr, shca->ipz_hca_handle.handle,
e_mr->ipz_mr_handle.handle, mr->lkey);
......@@ -683,7 +687,7 @@ int ehca_dereg_mr(struct ib_mr *mr)
dereg_mr_exit0:
if (ret)
ehca_err(mr->device, "ret=%x mr=%p", ret, mr);
ehca_err(mr->device, "ret=%i mr=%p", ret, mr);
return ret;
} /* end ehca_dereg_mr() */
......@@ -708,7 +712,7 @@ struct ib_mw *ehca_alloc_mw(struct ib_pd *pd)
h_ret = hipz_h_alloc_resource_mw(shca->ipz_hca_handle, e_mw,
e_pd->fw_pd, &hipzout);
if (h_ret != H_SUCCESS) {
ehca_err(pd->device, "hipz_mw_allocate failed, h_ret=%lx "
ehca_err(pd->device, "hipz_mw_allocate failed, h_ret=%li "
"shca=%p hca_hndl=%lx mw=%p",
h_ret, shca, shca->ipz_hca_handle.handle, e_mw);
ib_mw = ERR_PTR(ehca2ib_return_code(h_ret));
......@@ -723,7 +727,7 @@ struct ib_mw *ehca_alloc_mw(struct ib_pd *pd)
ehca_mw_delete(e_mw);
alloc_mw_exit0:
if (IS_ERR(ib_mw))
ehca_err(pd->device, "rc=%lx pd=%p", PTR_ERR(ib_mw), pd);
ehca_err(pd->device, "h_ret=%li pd=%p", PTR_ERR(ib_mw), pd);
return ib_mw;
} /* end ehca_alloc_mw() */
......@@ -750,7 +754,7 @@ int ehca_dealloc_mw(struct ib_mw *mw)
h_ret = hipz_h_free_resource_mw(shca->ipz_hca_handle, e_mw);
if (h_ret != H_SUCCESS) {
ehca_err(mw->device, "hipz_free_mw failed, h_ret=%lx shca=%p "
ehca_err(mw->device, "hipz_free_mw failed, h_ret=%li shca=%p "
"mw=%p rkey=%x hca_hndl=%lx mw_hndl=%lx",
h_ret, shca, mw, mw->rkey, shca->ipz_hca_handle.handle,
e_mw->ipz_mw_handle.handle);
......@@ -846,10 +850,6 @@ struct ib_fmr *ehca_alloc_fmr(struct ib_pd *pd,
alloc_fmr_exit1:
ehca_mr_delete(e_fmr);
alloc_fmr_exit0:
if (IS_ERR(ib_fmr))
ehca_err(pd->device, "rc=%lx pd=%p mr_access_flags=%x "
"fmr_attr=%p", PTR_ERR(ib_fmr), pd,
mr_access_flags, fmr_attr);
return ib_fmr;
} /* end ehca_alloc_fmr() */
......@@ -916,7 +916,7 @@ int ehca_map_phys_fmr(struct ib_fmr *fmr,
map_phys_fmr_exit0:
if (ret)
ehca_err(fmr->device, "ret=%x fmr=%p page_list=%p list_len=%x "
ehca_err(fmr->device, "ret=%i fmr=%p page_list=%p list_len=%x "
"iova=%lx", ret, fmr, page_list, list_len, iova);
return ret;
} /* end ehca_map_phys_fmr() */
......@@ -979,7 +979,7 @@ int ehca_unmap_fmr(struct list_head *fmr_list)
unmap_fmr_exit0:
if (ret)
ehca_gen_err("ret=%x fmr_list=%p num_fmr=%x unmap_fmr_cnt=%x",
ehca_gen_err("ret=%i fmr_list=%p num_fmr=%x unmap_fmr_cnt=%x",
ret, fmr_list, num_fmr, unmap_fmr_cnt);
return ret;
} /* end ehca_unmap_fmr() */
......@@ -1003,7 +1003,7 @@ int ehca_dealloc_fmr(struct ib_fmr *fmr)
h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_fmr);
if (h_ret != H_SUCCESS) {
ehca_err(fmr->device, "hipz_free_mr failed, h_ret=%lx e_fmr=%p "
ehca_err(fmr->device, "hipz_free_mr failed, h_ret=%li e_fmr=%p "
"hca_hndl=%lx fmr_hndl=%lx fmr->lkey=%x",
h_ret, e_fmr, shca->ipz_hca_handle.handle,
e_fmr->ipz_mr_handle.handle, fmr->lkey);
......@@ -1016,7 +1016,7 @@ int ehca_dealloc_fmr(struct ib_fmr *fmr)
free_fmr_exit0:
if (ret)
ehca_err(&shca->ib_device, "ret=%x fmr=%p", ret, fmr);
ehca_err(&shca->ib_device, "ret=%i fmr=%p", ret, fmr);
return ret;
} /* end ehca_dealloc_fmr() */
......@@ -1046,7 +1046,7 @@ int ehca_reg_mr(struct ehca_shca *shca,
(u64)iova_start, size, hipz_acl,
e_pd->fw_pd, &hipzout);
if (h_ret != H_SUCCESS) {
ehca_err(&shca->ib_device, "hipz_alloc_mr failed, h_ret=%lx "
ehca_err(&shca->ib_device, "hipz_alloc_mr failed, h_ret=%li "
"hca_hndl=%lx", h_ret, shca->ipz_hca_handle.handle);
ret = ehca2ib_return_code(h_ret);
goto ehca_reg_mr_exit0;
......@@ -1072,9 +1072,9 @@ int ehca_reg_mr(struct ehca_shca *shca,
ehca_reg_mr_exit1:
h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_mr);
if (h_ret != H_SUCCESS) {
ehca_err(&shca->ib_device, "h_ret=%lx shca=%p e_mr=%p "
ehca_err(&shca->ib_device, "h_ret=%li shca=%p e_mr=%p "
"iova_start=%p size=%lx acl=%x e_pd=%p lkey=%x "
"pginfo=%p num_kpages=%lx num_hwpages=%lx ret=%x",
"pginfo=%p num_kpages=%lx num_hwpages=%lx ret=%i",
h_ret, shca, e_mr, iova_start, size, acl, e_pd,
hipzout.lkey, pginfo, pginfo->num_kpages,
pginfo->num_hwpages, ret);
......@@ -1083,7 +1083,7 @@ int ehca_reg_mr(struct ehca_shca *shca,
}
ehca_reg_mr_exit0:
if (ret)
ehca_err(&shca->ib_device, "ret=%x shca=%p e_mr=%p "
ehca_err(&shca->ib_device, "ret=%i shca=%p e_mr=%p "
"iova_start=%p size=%lx acl=%x e_pd=%p pginfo=%p "
"num_kpages=%lx num_hwpages=%lx",
ret, shca, e_mr, iova_start, size, acl, e_pd, pginfo,
......@@ -1127,7 +1127,7 @@ int ehca_reg_mr_rpages(struct ehca_shca *shca,
ret = ehca_set_pagebuf(pginfo, rnum, kpage);
if (ret) {
ehca_err(&shca->ib_device, "ehca_set_pagebuf "
"bad rc, ret=%x rnum=%x kpage=%p",
"bad rc, ret=%i rnum=%x kpage=%p",
ret, rnum, kpage);
goto ehca_reg_mr_rpages_exit1;
}
......@@ -1155,7 +1155,7 @@ int ehca_reg_mr_rpages(struct ehca_shca *shca,
*/
if (h_ret != H_SUCCESS) {
ehca_err(&shca->ib_device, "last "
"hipz_reg_rpage_mr failed, h_ret=%lx "
"hipz_reg_rpage_mr failed, h_ret=%li "
"e_mr=%p i=%x hca_hndl=%lx mr_hndl=%lx"
" lkey=%x", h_ret, e_mr, i,
shca->ipz_hca_handle.handle,
......@@ -1167,7 +1167,7 @@ int ehca_reg_mr_rpages(struct ehca_shca *shca,
ret = 0;
} else if (h_ret != H_PAGE_REGISTERED) {
ehca_err(&shca->ib_device, "hipz_reg_rpage_mr failed, "
"h_ret=%lx e_mr=%p i=%x lkey=%x hca_hndl=%lx "
"h_ret=%li e_mr=%p i=%x lkey=%x hca_hndl=%lx "
"mr_hndl=%lx", h_ret, e_mr, i,
e_mr->ib.ib_mr.lkey,
shca->ipz_hca_handle.handle,
......@@ -1183,7 +1183,7 @@ int ehca_reg_mr_rpages(struct ehca_shca *shca,
ehca_free_fw_ctrlblock(kpage);
ehca_reg_mr_rpages_exit0:
if (ret)
ehca_err(&shca->ib_device, "ret=%x shca=%p e_mr=%p pginfo=%p "
ehca_err(&shca->ib_device, "ret=%i shca=%p e_mr=%p pginfo=%p "
"num_kpages=%lx num_hwpages=%lx", ret, shca, e_mr,
pginfo, pginfo->num_kpages, pginfo->num_hwpages);
return ret;
......@@ -1244,7 +1244,7 @@ inline int ehca_rereg_mr_rereg1(struct ehca_shca *shca,
* (MW bound or MR is shared)
*/
ehca_warn(&shca->ib_device, "hipz_h_reregister_pmr failed "
"(Rereg1), h_ret=%lx e_mr=%p", h_ret, e_mr);
"(Rereg1), h_ret=%li e_mr=%p", h_ret, e_mr);
*pginfo = pginfo_save;
ret = -EAGAIN;
} else if ((u64 *)hipzout.vaddr != iova_start) {
......@@ -1273,7 +1273,7 @@ inline int ehca_rereg_mr_rereg1(struct ehca_shca *shca,
ehca_free_fw_ctrlblock(kpage);
ehca_rereg_mr_rereg1_exit0:
if ( ret && (ret != -EAGAIN) )
ehca_err(&shca->ib_device, "ret=%x lkey=%x rkey=%x "
ehca_err(&shca->ib_device, "ret=%i lkey=%x rkey=%x "
"pginfo=%p num_kpages=%lx num_hwpages=%lx",
ret, *lkey, *rkey, pginfo, pginfo->num_kpages,
pginfo->num_hwpages);
......@@ -1334,7 +1334,7 @@ int ehca_rereg_mr(struct ehca_shca *shca,
h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_mr);
if (h_ret != H_SUCCESS) {
ehca_err(&shca->ib_device, "hipz_free_mr failed, "
"h_ret=%lx e_mr=%p hca_hndl=%lx mr_hndl=%lx "
"h_ret=%li e_mr=%p hca_hndl=%lx mr_hndl=%lx "
"mr->lkey=%x",
h_ret, e_mr, shca->ipz_hca_handle.handle,
e_mr->ipz_mr_handle.handle,
......@@ -1366,7 +1366,7 @@ int ehca_rereg_mr(struct ehca_shca *shca,
ehca_rereg_mr_exit0:
if (ret)
ehca_err(&shca->ib_device, "ret=%x shca=%p e_mr=%p "
ehca_err(&shca->ib_device, "ret=%i shca=%p e_mr=%p "
"iova_start=%p size=%lx acl=%x e_pd=%p pginfo=%p "
"num_kpages=%lx lkey=%x rkey=%x rereg_1_hcall=%x "
"rereg_3_hcall=%x", ret, shca, e_mr, iova_start, size,
......@@ -1410,7 +1410,7 @@ int ehca_unmap_one_fmr(struct ehca_shca *shca,
* FMRs are not shared and no MW bound to FMRs
*/
ehca_err(&shca->ib_device, "hipz_reregister_pmr failed "
"(Rereg1), h_ret=%lx e_fmr=%p hca_hndl=%lx "
"(Rereg1), h_ret=%li e_fmr=%p hca_hndl=%lx "
"mr_hndl=%lx lkey=%x lkey_out=%x",
h_ret, e_fmr, shca->ipz_hca_handle.handle,
e_fmr->ipz_mr_handle.handle,
......@@ -1422,7 +1422,7 @@ int ehca_unmap_one_fmr(struct ehca_shca *shca,
h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_fmr);
if (h_ret != H_SUCCESS) {
ehca_err(&shca->ib_device, "hipz_free_mr failed, "
"h_ret=%lx e_fmr=%p hca_hndl=%lx mr_hndl=%lx "
"h_ret=%li e_fmr=%p hca_hndl=%lx mr_hndl=%lx "
"lkey=%x",
h_ret, e_fmr, shca->ipz_hca_handle.handle,
e_fmr->ipz_mr_handle.handle,
......@@ -1457,7 +1457,7 @@ int ehca_unmap_one_fmr(struct ehca_shca *shca,
ehca_unmap_one_fmr_exit0:
if (ret)
ehca_err(&shca->ib_device, "ret=%x tmp_lkey=%x tmp_rkey=%x "
ehca_err(&shca->ib_device, "ret=%i tmp_lkey=%x tmp_rkey=%x "
"fmr_max_pages=%x",
ret, tmp_lkey, tmp_rkey, e_fmr->fmr_max_pages);
return ret;
......@@ -1486,7 +1486,7 @@ int ehca_reg_smr(struct ehca_shca *shca,
(u64)iova_start, hipz_acl, e_pd->fw_pd,
&hipzout);
if (h_ret != H_SUCCESS) {
ehca_err(&shca->ib_device, "hipz_reg_smr failed, h_ret=%lx "
ehca_err(&shca->ib_device, "hipz_reg_smr failed, h_ret=%li "
"shca=%p e_origmr=%p e_newmr=%p iova_start=%p acl=%x "
"e_pd=%p hca_hndl=%lx mr_hndl=%lx lkey=%x",
h_ret, shca, e_origmr, e_newmr, iova_start, acl, e_pd,
......@@ -1510,7 +1510,7 @@ int ehca_reg_smr(struct ehca_shca *shca,
ehca_reg_smr_exit0:
if (ret)
ehca_err(&shca->ib_device, "ret=%x shca=%p e_origmr=%p "
ehca_err(&shca->ib_device, "ret=%i shca=%p e_origmr=%p "
"e_newmr=%p iova_start=%p acl=%x e_pd=%p",
ret, shca, e_origmr, e_newmr, iova_start, acl, e_pd);
return ret;
......@@ -1585,7 +1585,7 @@ int ehca_reg_internal_maxmr(
ehca_mr_delete(e_mr);
ehca_reg_internal_maxmr_exit0:
if (ret)
ehca_err(&shca->ib_device, "ret=%x shca=%p e_pd=%p e_maxmr=%p",
ehca_err(&shca->ib_device, "ret=%i shca=%p e_pd=%p e_maxmr=%p",
ret, shca, e_pd, e_maxmr);
return ret;
} /* end ehca_reg_internal_maxmr() */
......@@ -1612,7 +1612,7 @@ int ehca_reg_maxmr(struct ehca_shca *shca,
(u64)iova_start, hipz_acl, e_pd->fw_pd,
&hipzout);
if (h_ret != H_SUCCESS) {
ehca_err(&shca->ib_device, "hipz_reg_smr failed, h_ret=%lx "
ehca_err(&shca->ib_device, "hipz_reg_smr failed, h_ret=%li "
"e_origmr=%p hca_hndl=%lx mr_hndl=%lx lkey=%x",
h_ret, e_origmr, shca->ipz_hca_handle.handle,
e_origmr->ipz_mr_handle.handle,
......@@ -1653,7 +1653,7 @@ int ehca_dereg_internal_maxmr(struct ehca_shca *shca)
ret = ehca_dereg_mr(&e_maxmr->ib.ib_mr);
if (ret) {
ehca_err(&shca->ib_device, "dereg internal max-MR failed, "
"ret=%x e_maxmr=%p shca=%p lkey=%x",
"ret=%i e_maxmr=%p shca=%p lkey=%x",
ret, e_maxmr, shca, e_maxmr->ib.ib_mr.lkey);
shca->maxmr = e_maxmr;
goto ehca_dereg_internal_maxmr_exit0;
......@@ -1663,7 +1663,7 @@ int ehca_dereg_internal_maxmr(struct ehca_shca *shca)
ehca_dereg_internal_maxmr_exit0:
if (ret)
ehca_err(&shca->ib_device, "ret=%x shca=%p shca->maxmr=%p",
ehca_err(&shca->ib_device, "ret=%i shca=%p shca->maxmr=%p",
ret, shca, shca->maxmr);
return ret;
} /* end ehca_dereg_internal_maxmr() */
......
......@@ -273,6 +273,7 @@ static inline void queue2resp(struct ipzu_queue_resp *resp,
resp->queue_length = queue->queue_length;
resp->pagesize = queue->pagesize;
resp->toggle_state = queue->toggle_state;
resp->offset = queue->offset;
}
/*
......@@ -309,7 +310,7 @@ static inline int init_qp_queue(struct ehca_shca *shca,
}
if (!ipz_rc) {
ehca_err(ib_dev, "Cannot allocate page for queue. ipz_rc=%x",
ehca_err(ib_dev, "Cannot allocate page for queue. ipz_rc=%i",
ipz_rc);
return -EBUSY;
}
......@@ -333,7 +334,7 @@ static inline int init_qp_queue(struct ehca_shca *shca,
if (cnt == (nr_q_pages - 1)) { /* last page! */
if (h_ret != expected_hret) {
ehca_err(ib_dev, "hipz_qp_register_rpage() "
"h_ret= %lx ", h_ret);
"h_ret=%li", h_ret);
ret = ehca2ib_return_code(h_ret);
goto init_qp_queue1;
}
......@@ -347,7 +348,7 @@ static inline int init_qp_queue(struct ehca_shca *shca,
} else {
if (h_ret != H_PAGE_REGISTERED) {
ehca_err(ib_dev, "hipz_qp_register_rpage() "
"h_ret= %lx ", h_ret);
"h_ret=%li", h_ret);
ret = ehca2ib_return_code(h_ret);
goto init_qp_queue1;
}
......@@ -512,7 +513,7 @@ static struct ehca_qp *internal_create_qp(
} else if (init_attr->cap.max_send_wr > 255) {
ehca_err(pd->device,
"Invalid Number of "
"ax_send_wr=%x for UD QP_TYPE=%x",
"max_send_wr=%x for UD QP_TYPE=%x",
init_attr->cap.max_send_wr, qp_type);
return ERR_PTR(-EINVAL);
}
......@@ -523,6 +524,18 @@ static struct ehca_qp *internal_create_qp(
return ERR_PTR(-EINVAL);
break;
}
} else {
int max_sge = (qp_type == IB_QPT_UD || qp_type == IB_QPT_SMI
|| qp_type == IB_QPT_GSI) ? 250 : 252;
if (init_attr->cap.max_send_sge > max_sge
|| init_attr->cap.max_recv_sge > max_sge) {
ehca_err(pd->device, "Invalid number of SGEs requested "
"send_sge=%x recv_sge=%x max_sge=%x",
init_attr->cap.max_send_sge,
init_attr->cap.max_recv_sge, max_sge);
return ERR_PTR(-EINVAL);
}
}
if (pd->uobject && udata)
......@@ -556,7 +569,6 @@ static struct ehca_qp *internal_create_qp(
write_lock_irqsave(&ehca_qp_idr_lock, flags);
ret = idr_get_new(&ehca_qp_idr, my_qp, &my_qp->token);
write_unlock_irqrestore(&ehca_qp_idr_lock, flags);
} while (ret == -EAGAIN);
if (ret) {
......@@ -565,11 +577,17 @@ static struct ehca_qp *internal_create_qp(
goto create_qp_exit0;
}
if (my_qp->token > 0x1FFFFFF) {
ret = -EINVAL;
ehca_err(pd->device, "Invalid number of qp");
goto create_qp_exit1;
}
parms.servicetype = ibqptype2servicetype(qp_type);
if (parms.servicetype < 0) {
ret = -EINVAL;
ehca_err(pd->device, "Invalid qp_type=%x", qp_type);
goto create_qp_exit0;
goto create_qp_exit1;
}
if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)
......@@ -598,8 +616,7 @@ static struct ehca_qp *internal_create_qp(
parms.squeue.max_sge = max_send_sge;
parms.rqueue.max_sge = max_recv_sge;
if (EHCA_BMASK_GET(HCA_CAP_MINI_QP, shca->hca_cap)
&& !(context && udata)) { /* no small QP support in userspace ATM */
if (EHCA_BMASK_GET(HCA_CAP_MINI_QP, shca->hca_cap)) {
if (HAS_SQ(my_qp))
ehca_determine_small_queue(
&parms.squeue, max_send_sge, is_llqp);
......@@ -612,7 +629,7 @@ static struct ehca_qp *internal_create_qp(
h_ret = hipz_h_alloc_resource_qp(shca->ipz_hca_handle, &parms);
if (h_ret != H_SUCCESS) {
ehca_err(pd->device, "h_alloc_resource_qp() failed h_ret=%lx",
ehca_err(pd->device, "h_alloc_resource_qp() failed h_ret=%li",
h_ret);
ret = ehca2ib_return_code(h_ret);
goto create_qp_exit1;
......@@ -666,7 +683,7 @@ static struct ehca_qp *internal_create_qp(
&parms.squeue, swqe_size);
if (ret) {
ehca_err(pd->device, "Couldn't initialize squeue "
"and pages ret=%x", ret);
"and pages ret=%i", ret);
goto create_qp_exit2;
}
}
......@@ -677,7 +694,7 @@ static struct ehca_qp *internal_create_qp(
H_SUCCESS, &parms.rqueue, rwqe_size);
if (ret) {
ehca_err(pd->device, "Couldn't initialize rqueue "
"and pages ret=%x", ret);
"and pages ret=%i", ret);
goto create_qp_exit3;
}
}
......@@ -714,8 +731,6 @@ static struct ehca_qp *internal_create_qp(
if (qp_type == IB_QPT_GSI) {
h_ret = ehca_define_sqp(shca, my_qp, init_attr);
if (h_ret != H_SUCCESS) {
ehca_err(pd->device, "ehca_define_sqp() failed rc=%lx",
h_ret);
ret = ehca2ib_return_code(h_ret);
goto create_qp_exit4;
}
......@@ -725,7 +740,7 @@ static struct ehca_qp *internal_create_qp(
ret = ehca_cq_assign_qp(my_qp->send_cq, my_qp);
if (ret) {
ehca_err(pd->device,
"Couldn't assign qp to send_cq ret=%x", ret);
"Couldn't assign qp to send_cq ret=%i", ret);
goto create_qp_exit4;
}
}
......@@ -741,12 +756,13 @@ static struct ehca_qp *internal_create_qp(
resp.ext_type = my_qp->ext_type;
resp.qkey = my_qp->qkey;
resp.real_qp_num = my_qp->real_qp_num;
resp.ipz_rqueue.offset = my_qp->ipz_rqueue.offset;
resp.ipz_squeue.offset = my_qp->ipz_squeue.offset;
if (HAS_SQ(my_qp))
queue2resp(&resp.ipz_squeue, &my_qp->ipz_squeue);
if (HAS_RQ(my_qp))
queue2resp(&resp.ipz_rqueue, &my_qp->ipz_rqueue);
resp.fw_handle_ofs = (u32)
(my_qp->galpas.user.fw_handle & (PAGE_SIZE - 1));
if (ib_copy_to_udata(udata, &resp, sizeof resp)) {
ehca_err(pd->device, "Copy to udata failed");
......@@ -841,7 +857,7 @@ struct ib_srq *ehca_create_srq(struct ib_pd *pd,
mqpcb, my_qp->galpas.kernel);
if (hret != H_SUCCESS) {
ehca_err(pd->device, "Could not modify SRQ to INIT"
"ehca_qp=%p qp_num=%x hret=%lx",
"ehca_qp=%p qp_num=%x h_ret=%li",
my_qp, my_qp->real_qp_num, hret);
goto create_srq2;
}
......@@ -855,7 +871,7 @@ struct ib_srq *ehca_create_srq(struct ib_pd *pd,
mqpcb, my_qp->galpas.kernel);
if (hret != H_SUCCESS) {
ehca_err(pd->device, "Could not enable SRQ"
"ehca_qp=%p qp_num=%x hret=%lx",
"ehca_qp=%p qp_num=%x h_ret=%li",
my_qp, my_qp->real_qp_num, hret);
goto create_srq2;
}
......@@ -869,11 +885,13 @@ struct ib_srq *ehca_create_srq(struct ib_pd *pd,
mqpcb, my_qp->galpas.kernel);
if (hret != H_SUCCESS) {
ehca_err(pd->device, "Could not modify SRQ to RTR"
"ehca_qp=%p qp_num=%x hret=%lx",
"ehca_qp=%p qp_num=%x h_ret=%li",
my_qp, my_qp->real_qp_num, hret);
goto create_srq2;
}
ehca_free_fw_ctrlblock(mqpcb);
return &my_qp->ib_srq;
create_srq2:
......@@ -907,7 +925,7 @@ static int prepare_sqe_rts(struct ehca_qp *my_qp, struct ehca_shca *shca,
&bad_send_wqe_p, NULL, 2);
if (h_ret != H_SUCCESS) {
ehca_err(&shca->ib_device, "hipz_h_disable_and_get_wqe() failed"
" ehca_qp=%p qp_num=%x h_ret=%lx",
" ehca_qp=%p qp_num=%x h_ret=%li",
my_qp, qp_num, h_ret);
return ehca2ib_return_code(h_ret);
}
......@@ -985,7 +1003,7 @@ static int internal_modify_qp(struct ib_qp *ibqp,
mqpcb, my_qp->galpas.kernel);
if (h_ret != H_SUCCESS) {
ehca_err(ibqp->device, "hipz_h_query_qp() failed "
"ehca_qp=%p qp_num=%x h_ret=%lx",
"ehca_qp=%p qp_num=%x h_ret=%li",
my_qp, ibqp->qp_num, h_ret);
ret = ehca2ib_return_code(h_ret);
goto modify_qp_exit1;
......@@ -1021,7 +1039,7 @@ static int internal_modify_qp(struct ib_qp *ibqp,
ibqp, &smiqp_attr, smiqp_attr_mask, 1);
if (smirc) {
ehca_err(ibqp->device, "SMI RESET -> INIT failed. "
"ehca_modify_qp() rc=%x", smirc);
"ehca_modify_qp() rc=%i", smirc);
ret = H_PARAMETER;
goto modify_qp_exit1;
}
......@@ -1123,7 +1141,7 @@ static int internal_modify_qp(struct ib_qp *ibqp,
ret = prepare_sqe_rts(my_qp, shca, &bad_wqe_cnt);
if (ret) {
ehca_err(ibqp->device, "prepare_sqe_rts() failed "
"ehca_qp=%p qp_num=%x ret=%x",
"ehca_qp=%p qp_num=%x ret=%i",
my_qp, ibqp->qp_num, ret);
goto modify_qp_exit2;
}
......@@ -1149,6 +1167,13 @@ static int internal_modify_qp(struct ib_qp *ibqp,
}
if (attr_mask & IB_QP_PKEY_INDEX) {
if (attr->pkey_index >= 16) {
ret = -EINVAL;
ehca_err(ibqp->device, "Invalid pkey_index=%x. "
"ehca_qp=%p qp_num=%x max_pkey_index=f",
attr->pkey_index, my_qp, ibqp->qp_num);
goto modify_qp_exit2;
}
mqpcb->prim_p_key_idx = attr->pkey_index;
update_mask |= EHCA_BMASK_SET(MQPCB_MASK_PRIM_P_KEY_IDX, 1);
}
......@@ -1257,50 +1282,78 @@ static int internal_modify_qp(struct ib_qp *ibqp,
int ehca_mult = ib_rate_to_mult(
shca->sport[my_qp->init_attr.port_num].rate);
if (attr->alt_port_num < 1
|| attr->alt_port_num > shca->num_ports) {
ret = -EINVAL;
ehca_err(ibqp->device, "Invalid alt_port=%x. "
"ehca_qp=%p qp_num=%x num_ports=%x",
attr->alt_port_num, my_qp, ibqp->qp_num,
shca->num_ports);
goto modify_qp_exit2;
}
mqpcb->alt_phys_port = attr->alt_port_num;
if (attr->alt_pkey_index >= 16) {
ret = -EINVAL;
ehca_err(ibqp->device, "Invalid alt_pkey_index=%x. "
"ehca_qp=%p qp_num=%x max_pkey_index=f",
attr->pkey_index, my_qp, ibqp->qp_num);
goto modify_qp_exit2;
}
mqpcb->alt_p_key_idx = attr->alt_pkey_index;
mqpcb->timeout_al = attr->alt_timeout;
mqpcb->dlid_al = attr->alt_ah_attr.dlid;
update_mask |= EHCA_BMASK_SET(MQPCB_MASK_DLID_AL, 1);
mqpcb->source_path_bits_al = attr->alt_ah_attr.src_path_bits;
update_mask |=
EHCA_BMASK_SET(MQPCB_MASK_SOURCE_PATH_BITS_AL, 1);
mqpcb->service_level_al = attr->alt_ah_attr.sl;
update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SERVICE_LEVEL_AL, 1);
if (ah_mult < ehca_mult)
mqpcb->max_static_rate = (ah_mult > 0) ?
((ehca_mult - 1) / ah_mult) : 0;
if (ah_mult > 0 && ah_mult < ehca_mult)
mqpcb->max_static_rate_al = (ehca_mult - 1) / ah_mult;
else
mqpcb->max_static_rate_al = 0;
update_mask |= EHCA_BMASK_SET(MQPCB_MASK_MAX_STATIC_RATE_AL, 1);
/* OpenIB doesn't support alternate retry counts - copy them */
mqpcb->retry_count_al = mqpcb->retry_count;
mqpcb->rnr_retry_count_al = mqpcb->rnr_retry_count;
update_mask |= EHCA_BMASK_SET(MQPCB_MASK_ALT_PHYS_PORT, 1)
| EHCA_BMASK_SET(MQPCB_MASK_ALT_P_KEY_IDX, 1)
| EHCA_BMASK_SET(MQPCB_MASK_TIMEOUT_AL, 1)
| EHCA_BMASK_SET(MQPCB_MASK_DLID_AL, 1)
| EHCA_BMASK_SET(MQPCB_MASK_SOURCE_PATH_BITS_AL, 1)
| EHCA_BMASK_SET(MQPCB_MASK_SERVICE_LEVEL_AL, 1)
| EHCA_BMASK_SET(MQPCB_MASK_MAX_STATIC_RATE_AL, 1)
| EHCA_BMASK_SET(MQPCB_MASK_RETRY_COUNT_AL, 1)
| EHCA_BMASK_SET(MQPCB_MASK_RNR_RETRY_COUNT_AL, 1);
/*
* Always supply the GRH flag, even if it's zero, to give the
* hypervisor a clear "yes" or "no" instead of a "perhaps"
*/
update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SEND_GRH_FLAG_AL, 1);
/*
* only if GRH is TRUE we might consider SOURCE_GID_IDX
* and DEST_GID otherwise phype will return H_ATTR_PARM!!!
*/
if (attr->alt_ah_attr.ah_flags == IB_AH_GRH) {
mqpcb->send_grh_flag_al = 1 << 31;
update_mask |=
EHCA_BMASK_SET(MQPCB_MASK_SEND_GRH_FLAG_AL, 1);
mqpcb->source_gid_idx_al =
attr->alt_ah_attr.grh.sgid_index;
update_mask |=
EHCA_BMASK_SET(MQPCB_MASK_SOURCE_GID_IDX_AL, 1);
mqpcb->send_grh_flag_al = 1;
for (cnt = 0; cnt < 16; cnt++)
mqpcb->dest_gid_al.byte[cnt] =
attr->alt_ah_attr.grh.dgid.raw[cnt];
update_mask |=
EHCA_BMASK_SET(MQPCB_MASK_DEST_GID_AL, 1);
mqpcb->source_gid_idx_al =
attr->alt_ah_attr.grh.sgid_index;
mqpcb->flow_label_al = attr->alt_ah_attr.grh.flow_label;
update_mask |=
EHCA_BMASK_SET(MQPCB_MASK_FLOW_LABEL_AL, 1);
mqpcb->hop_limit_al = attr->alt_ah_attr.grh.hop_limit;
update_mask |=
EHCA_BMASK_SET(MQPCB_MASK_HOP_LIMIT_AL, 1);
mqpcb->traffic_class_al =
attr->alt_ah_attr.grh.traffic_class;
update_mask |=
EHCA_BMASK_SET(MQPCB_MASK_SOURCE_GID_IDX_AL, 1)
| EHCA_BMASK_SET(MQPCB_MASK_DEST_GID_AL, 1)
| EHCA_BMASK_SET(MQPCB_MASK_FLOW_LABEL_AL, 1)
| EHCA_BMASK_SET(MQPCB_MASK_HOP_LIMIT_AL, 1) |
EHCA_BMASK_SET(MQPCB_MASK_TRAFFIC_CLASS_AL, 1);
}
}
......@@ -1322,7 +1375,14 @@ static int internal_modify_qp(struct ib_qp *ibqp,
}
if (attr_mask & IB_QP_PATH_MIG_STATE) {
mqpcb->path_migration_state = attr->path_mig_state;
if (attr->path_mig_state != IB_MIG_REARM
&& attr->path_mig_state != IB_MIG_MIGRATED) {
ret = -EINVAL;
ehca_err(ibqp->device, "Invalid mig_state=%x",
attr->path_mig_state);
goto modify_qp_exit2;
}
mqpcb->path_migration_state = attr->path_mig_state + 1;
update_mask |=
EHCA_BMASK_SET(MQPCB_MASK_PATH_MIGRATION_STATE, 1);
}
......@@ -1348,7 +1408,7 @@ static int internal_modify_qp(struct ib_qp *ibqp,
if (h_ret != H_SUCCESS) {
ret = ehca2ib_return_code(h_ret);
ehca_err(ibqp->device, "hipz_h_modify_qp() failed rc=%lx "
ehca_err(ibqp->device, "hipz_h_modify_qp() failed h_ret=%li "
"ehca_qp=%p qp_num=%x", h_ret, my_qp, ibqp->qp_num);
goto modify_qp_exit2;
}
......@@ -1381,7 +1441,7 @@ static int internal_modify_qp(struct ib_qp *ibqp,
ret = ehca2ib_return_code(h_ret);
ehca_err(ibqp->device, "ENABLE in context of "
"RESET_2_INIT failed! Maybe you didn't get "
"a LID h_ret=%lx ehca_qp=%p qp_num=%x",
"a LID h_ret=%li ehca_qp=%p qp_num=%x",
h_ret, my_qp, ibqp->qp_num);
goto modify_qp_exit2;
}
......@@ -1469,7 +1529,7 @@ int ehca_query_qp(struct ib_qp *qp,
if (h_ret != H_SUCCESS) {
ret = ehca2ib_return_code(h_ret);
ehca_err(qp->device, "hipz_h_query_qp() failed "
"ehca_qp=%p qp_num=%x h_ret=%lx",
"ehca_qp=%p qp_num=%x h_ret=%li",
my_qp, qp->qp_num, h_ret);
goto query_qp_exit1;
}
......@@ -1490,7 +1550,7 @@ int ehca_query_qp(struct ib_qp *qp,
qp_attr->qkey = qpcb->qkey;
qp_attr->path_mtu = qpcb->path_mtu;
qp_attr->path_mig_state = qpcb->path_migration_state;
qp_attr->path_mig_state = qpcb->path_migration_state - 1;
qp_attr->rq_psn = qpcb->receive_psn;
qp_attr->sq_psn = qpcb->send_psn;
qp_attr->min_rnr_timer = qpcb->min_rnr_nak_timer_field;
......@@ -1644,7 +1704,7 @@ int ehca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
if (h_ret != H_SUCCESS) {
ret = ehca2ib_return_code(h_ret);
ehca_err(ibsrq->device, "hipz_h_modify_qp() failed rc=%lx "
ehca_err(ibsrq->device, "hipz_h_modify_qp() failed h_ret=%li "
"ehca_qp=%p qp_num=%x",
h_ret, my_qp, my_qp->real_qp_num);
}
......@@ -1687,12 +1747,13 @@ int ehca_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr)
if (h_ret != H_SUCCESS) {
ret = ehca2ib_return_code(h_ret);
ehca_err(srq->device, "hipz_h_query_qp() failed "
"ehca_qp=%p qp_num=%x h_ret=%lx",
"ehca_qp=%p qp_num=%x h_ret=%li",
my_qp, my_qp->real_qp_num, h_ret);
goto query_srq_exit1;
}
srq_attr->max_wr = qpcb->max_nr_outst_recv_wr - 1;
srq_attr->max_sge = qpcb->actual_nr_sges_in_rq_wqe;
srq_attr->srq_limit = EHCA_BMASK_GET(
MQPCB_CURR_SRQ_LIMIT, qpcb->curr_srq_limit);
......@@ -1737,7 +1798,7 @@ static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp,
ret = ehca_cq_unassign_qp(my_qp->send_cq, qp_num);
if (ret) {
ehca_err(dev, "Couldn't unassign qp from "
"send_cq ret=%x qp_num=%x cq_num=%x", ret,
"send_cq ret=%i qp_num=%x cq_num=%x", ret,
qp_num, my_qp->send_cq->cq_number);
return ret;
}
......@@ -1749,7 +1810,7 @@ static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp,
h_ret = hipz_h_destroy_qp(shca->ipz_hca_handle, my_qp);
if (h_ret != H_SUCCESS) {
ehca_err(dev, "hipz_h_destroy_qp() failed rc=%lx "
ehca_err(dev, "hipz_h_destroy_qp() failed h_ret=%li "
"ehca_qp=%p qp_num=%x", h_ret, my_qp, qp_num);
return ehca2ib_return_code(h_ret);
}
......
......@@ -526,7 +526,7 @@ static inline int ehca_poll_cq_one(struct ib_cq *cq, struct ib_wc *wc)
if (!cqe) {
ret = -EAGAIN;
ehca_dbg(cq->device, "Completion queue is empty ehca_cq=%p "
"cq_num=%x ret=%x", my_cq, my_cq->cq_number, ret);
"cq_num=%x ret=%i", my_cq, my_cq->cq_number, ret);
goto poll_cq_one_exit0;
}
......
......@@ -82,7 +82,7 @@ u64 ehca_define_sqp(struct ehca_shca *shca,
if (ret != H_SUCCESS) {
ehca_err(&shca->ib_device,
"Can't define AQP1 for port %x. rc=%lx",
"Can't define AQP1 for port %x. h_ret=%li",
port, ret);
return ret;
}
......
......@@ -73,40 +73,37 @@ extern int ehca_debug_level;
if (unlikely(ehca_debug_level)) \
dev_printk(KERN_DEBUG, (ib_dev)->dma_device, \
"PU%04x EHCA_DBG:%s " format "\n", \
get_paca()->paca_index, __FUNCTION__, \
raw_smp_processor_id(), __FUNCTION__, \
## arg); \
} while (0)
#define ehca_info(ib_dev, format, arg...) \
dev_info((ib_dev)->dma_device, "PU%04x EHCA_INFO:%s " format "\n", \
get_paca()->paca_index, __FUNCTION__, ## arg)
raw_smp_processor_id(), __FUNCTION__, ## arg)
#define ehca_warn(ib_dev, format, arg...) \
dev_warn((ib_dev)->dma_device, "PU%04x EHCA_WARN:%s " format "\n", \
get_paca()->paca_index, __FUNCTION__, ## arg)
raw_smp_processor_id(), __FUNCTION__, ## arg)
#define ehca_err(ib_dev, format, arg...) \
dev_err((ib_dev)->dma_device, "PU%04x EHCA_ERR:%s " format "\n", \
get_paca()->paca_index, __FUNCTION__, ## arg)
raw_smp_processor_id(), __FUNCTION__, ## arg)
/* use this one only if no ib_dev available */
#define ehca_gen_dbg(format, arg...) \
do { \
if (unlikely(ehca_debug_level)) \
printk(KERN_DEBUG "PU%04x EHCA_DBG:%s " format "\n", \
get_paca()->paca_index, __FUNCTION__, ## arg); \
raw_smp_processor_id(), __FUNCTION__, ## arg); \
} while (0)
#define ehca_gen_warn(format, arg...) \
do { \
if (unlikely(ehca_debug_level)) \
printk(KERN_INFO "PU%04x EHCA_WARN:%s " format "\n", \
get_paca()->paca_index, __FUNCTION__, ## arg); \
} while (0)
printk(KERN_INFO "PU%04x EHCA_WARN:%s " format "\n", \
raw_smp_processor_id(), __FUNCTION__, ## arg)
#define ehca_gen_err(format, arg...) \
printk(KERN_ERR "PU%04x EHCA_ERR:%s " format "\n", \
get_paca()->paca_index, __FUNCTION__, ## arg)
raw_smp_processor_id(), __FUNCTION__, ## arg)
/**
* ehca_dmp - printk a memory block, whose length is n*8 bytes.
......
......@@ -109,7 +109,7 @@ static int ehca_mmap_fw(struct vm_area_struct *vma, struct h_galpas *galpas,
u64 vsize, physical;
vsize = vma->vm_end - vma->vm_start;
if (vsize != EHCA_PAGESIZE) {
if (vsize < EHCA_PAGESIZE) {
ehca_gen_err("invalid vsize=%lx", vma->vm_end - vma->vm_start);
return -EINVAL;
}
......@@ -118,10 +118,10 @@ static int ehca_mmap_fw(struct vm_area_struct *vma, struct h_galpas *galpas,
vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
ehca_gen_dbg("vsize=%lx physical=%lx", vsize, physical);
/* VM_IO | VM_RESERVED are set by remap_pfn_range() */
ret = remap_pfn_range(vma, vma->vm_start, physical >> PAGE_SHIFT,
vsize, vma->vm_page_prot);
ret = remap_4k_pfn(vma, vma->vm_start, physical >> EHCA_PAGESHIFT,
vma->vm_page_prot);
if (unlikely(ret)) {
ehca_gen_err("remap_pfn_range() failed ret=%x", ret);
ehca_gen_err("remap_pfn_range() failed ret=%i", ret);
return -ENOMEM;
}
......@@ -146,7 +146,7 @@ static int ehca_mmap_queue(struct vm_area_struct *vma, struct ipz_queue *queue,
page = virt_to_page(virt_addr);
ret = vm_insert_page(vma, start, page);
if (unlikely(ret)) {
ehca_gen_err("vm_insert_page() failed rc=%x", ret);
ehca_gen_err("vm_insert_page() failed rc=%i", ret);
return ret;
}
start += PAGE_SIZE;
......@@ -164,23 +164,23 @@ static int ehca_mmap_cq(struct vm_area_struct *vma, struct ehca_cq *cq,
int ret;
switch (rsrc_type) {
case 1: /* galpa fw handle */
case 0: /* galpa fw handle */
ehca_dbg(cq->ib_cq.device, "cq_num=%x fw", cq->cq_number);
ret = ehca_mmap_fw(vma, &cq->galpas, &cq->mm_count_galpa);
if (unlikely(ret)) {
ehca_err(cq->ib_cq.device,
"ehca_mmap_fw() failed rc=%x cq_num=%x",
"ehca_mmap_fw() failed rc=%i cq_num=%x",
ret, cq->cq_number);
return ret;
}
break;
case 2: /* cq queue_addr */
case 1: /* cq queue_addr */
ehca_dbg(cq->ib_cq.device, "cq_num=%x queue", cq->cq_number);
ret = ehca_mmap_queue(vma, &cq->ipz_queue, &cq->mm_count_queue);
if (unlikely(ret)) {
ehca_err(cq->ib_cq.device,
"ehca_mmap_queue() failed rc=%x cq_num=%x",
"ehca_mmap_queue() failed rc=%i cq_num=%x",
ret, cq->cq_number);
return ret;
}
......@@ -201,38 +201,38 @@ static int ehca_mmap_qp(struct vm_area_struct *vma, struct ehca_qp *qp,
int ret;
switch (rsrc_type) {
case 1: /* galpa fw handle */
case 0: /* galpa fw handle */
ehca_dbg(qp->ib_qp.device, "qp_num=%x fw", qp->ib_qp.qp_num);
ret = ehca_mmap_fw(vma, &qp->galpas, &qp->mm_count_galpa);
if (unlikely(ret)) {
ehca_err(qp->ib_qp.device,
"remap_pfn_range() failed ret=%x qp_num=%x",
"remap_pfn_range() failed ret=%i qp_num=%x",
ret, qp->ib_qp.qp_num);
return -ENOMEM;
}
break;
case 2: /* qp rqueue_addr */
case 1: /* qp rqueue_addr */
ehca_dbg(qp->ib_qp.device, "qp_num=%x rqueue",
qp->ib_qp.qp_num);
ret = ehca_mmap_queue(vma, &qp->ipz_rqueue,
&qp->mm_count_rqueue);
if (unlikely(ret)) {
ehca_err(qp->ib_qp.device,
"ehca_mmap_queue(rq) failed rc=%x qp_num=%x",
"ehca_mmap_queue(rq) failed rc=%i qp_num=%x",
ret, qp->ib_qp.qp_num);
return ret;
}
break;
case 3: /* qp squeue_addr */
case 2: /* qp squeue_addr */
ehca_dbg(qp->ib_qp.device, "qp_num=%x squeue",
qp->ib_qp.qp_num);
ret = ehca_mmap_queue(vma, &qp->ipz_squeue,
&qp->mm_count_squeue);
if (unlikely(ret)) {
ehca_err(qp->ib_qp.device,
"ehca_mmap_queue(sq) failed rc=%x qp_num=%x",
"ehca_mmap_queue(sq) failed rc=%i qp_num=%x",
ret, qp->ib_qp.qp_num);
return ret;
}
......@@ -249,10 +249,10 @@ static int ehca_mmap_qp(struct vm_area_struct *vma, struct ehca_qp *qp,
int ehca_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
{
u64 fileoffset = vma->vm_pgoff << PAGE_SHIFT;
u32 idr_handle = fileoffset >> 32;
u32 q_type = (fileoffset >> 28) & 0xF; /* CQ, QP,... */
u32 rsrc_type = (fileoffset >> 24) & 0xF; /* sq,rq,cmnd_window */
u64 fileoffset = vma->vm_pgoff;
u32 idr_handle = fileoffset & 0x1FFFFFF;
u32 q_type = (fileoffset >> 27) & 0x1; /* CQ, QP,... */
u32 rsrc_type = (fileoffset >> 25) & 0x3; /* sq,rq,cmnd_window */
u32 cur_pid = current->tgid;
u32 ret;
struct ehca_cq *cq;
......@@ -261,7 +261,7 @@ int ehca_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
struct ib_uobject *uobject;
switch (q_type) {
case 1: /* CQ */
case 0: /* CQ */
read_lock(&ehca_cq_idr_lock);
cq = idr_find(&ehca_cq_idr, idr_handle);
read_unlock(&ehca_cq_idr_lock);
......@@ -283,13 +283,13 @@ int ehca_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
ret = ehca_mmap_cq(vma, cq, rsrc_type);
if (unlikely(ret)) {
ehca_err(cq->ib_cq.device,
"ehca_mmap_cq() failed rc=%x cq_num=%x",
"ehca_mmap_cq() failed rc=%i cq_num=%x",
ret, cq->cq_number);
return ret;
}
break;
case 2: /* QP */
case 1: /* QP */
read_lock(&ehca_qp_idr_lock);
qp = idr_find(&ehca_qp_idr, idr_handle);
read_unlock(&ehca_qp_idr_lock);
......@@ -313,7 +313,7 @@ int ehca_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
ret = ehca_mmap_qp(vma, qp, rsrc_type);
if (unlikely(ret)) {
ehca_err(qp->ib_qp.device,
"ehca_mmap_qp() failed rc=%x qp_num=%x",
"ehca_mmap_qp() failed rc=%i qp_num=%x",
ret, qp->ib_qp.qp_num);
return ret;
}
......
......@@ -84,6 +84,10 @@
#define H_MP_SHUTDOWN EHCA_BMASK_IBM(48, 48)
#define H_MP_RESET_QKEY_CTR EHCA_BMASK_IBM(49, 49)
#define HCALL4_REGS_FORMAT "r4=%lx r5=%lx r6=%lx r7=%lx"
#define HCALL7_REGS_FORMAT HCALL4_REGS_FORMAT " r8=%lx r9=%lx r10=%lx"
#define HCALL9_REGS_FORMAT HCALL7_REGS_FORMAT " r11=%lx r12=%lx"
static DEFINE_SPINLOCK(hcall_lock);
static u32 get_longbusy_msecs(int longbusy_rc)
......@@ -116,16 +120,28 @@ static long ehca_plpar_hcall_norets(unsigned long opcode,
unsigned long arg7)
{
long ret;
int i, sleep_msecs;
int i, sleep_msecs, do_lock;
unsigned long flags;
ehca_gen_dbg("opcode=%lx arg1=%lx arg2=%lx arg3=%lx arg4=%lx "
"arg5=%lx arg6=%lx arg7=%lx",
ehca_gen_dbg("opcode=%lx " HCALL7_REGS_FORMAT,
opcode, arg1, arg2, arg3, arg4, arg5, arg6, arg7);
/* lock H_FREE_RESOURCE(MR) against itself and H_ALLOC_RESOURCE(MR) */
if ((opcode == H_FREE_RESOURCE) && (arg7 == 5)) {
arg7 = 0; /* better not upset firmware */
do_lock = 1;
}
for (i = 0; i < 5; i++) {
if (do_lock)
spin_lock_irqsave(&hcall_lock, flags);
ret = plpar_hcall_norets(opcode, arg1, arg2, arg3, arg4,
arg5, arg6, arg7);
if (do_lock)
spin_unlock_irqrestore(&hcall_lock, flags);
if (H_IS_LONG_BUSY(ret)) {
sleep_msecs = get_longbusy_msecs(ret);
msleep_interruptible(sleep_msecs);
......@@ -133,16 +149,13 @@ static long ehca_plpar_hcall_norets(unsigned long opcode,
}
if (ret < H_SUCCESS)
ehca_gen_err("opcode=%lx ret=%lx"
" arg1=%lx arg2=%lx arg3=%lx arg4=%lx"
" arg5=%lx arg6=%lx arg7=%lx ",
opcode, ret,
arg1, arg2, arg3, arg4, arg5,
arg6, arg7);
ehca_gen_dbg("opcode=%lx ret=%lx", opcode, ret);
return ret;
ehca_gen_err("opcode=%lx ret=%li " HCALL7_REGS_FORMAT,
opcode, ret, arg1, arg2, arg3,
arg4, arg5, arg6, arg7);
else
ehca_gen_dbg("opcode=%lx ret=%li", opcode, ret);
return ret;
}
return H_BUSY;
......@@ -161,25 +174,24 @@ static long ehca_plpar_hcall9(unsigned long opcode,
unsigned long arg9)
{
long ret;
int i, sleep_msecs, lock_is_set = 0;
int i, sleep_msecs, do_lock;
unsigned long flags = 0;
ehca_gen_dbg("opcode=%lx arg1=%lx arg2=%lx arg3=%lx arg4=%lx "
"arg5=%lx arg6=%lx arg7=%lx arg8=%lx arg9=%lx",
opcode, arg1, arg2, arg3, arg4, arg5, arg6, arg7,
arg8, arg9);
ehca_gen_dbg("INPUT -- opcode=%lx " HCALL9_REGS_FORMAT, opcode,
arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9);
/* lock H_ALLOC_RESOURCE(MR) against itself and H_FREE_RESOURCE(MR) */
do_lock = ((opcode == H_ALLOC_RESOURCE) && (arg2 == 5));
for (i = 0; i < 5; i++) {
if ((opcode == H_ALLOC_RESOURCE) && (arg2 == 5)) {
if (do_lock)
spin_lock_irqsave(&hcall_lock, flags);
lock_is_set = 1;
}
ret = plpar_hcall9(opcode, outs,
arg1, arg2, arg3, arg4, arg5,
arg6, arg7, arg8, arg9);
if (lock_is_set)
if (do_lock)
spin_unlock_irqrestore(&hcall_lock, flags);
if (H_IS_LONG_BUSY(ret)) {
......@@ -188,26 +200,19 @@ static long ehca_plpar_hcall9(unsigned long opcode,
continue;
}
if (ret < H_SUCCESS)
ehca_gen_err("opcode=%lx ret=%lx"
" arg1=%lx arg2=%lx arg3=%lx arg4=%lx"
" arg5=%lx arg6=%lx arg7=%lx arg8=%lx"
" arg9=%lx"
" out1=%lx out2=%lx out3=%lx out4=%lx"
" out5=%lx out6=%lx out7=%lx out8=%lx"
" out9=%lx",
opcode, ret,
arg1, arg2, arg3, arg4, arg5,
arg6, arg7, arg8, arg9,
outs[0], outs[1], outs[2], outs[3],
if (ret < H_SUCCESS) {
ehca_gen_err("INPUT -- opcode=%lx " HCALL9_REGS_FORMAT,
opcode, arg1, arg2, arg3, arg4, arg5,
arg6, arg7, arg8, arg9);
ehca_gen_err("OUTPUT -- ret=%li " HCALL9_REGS_FORMAT,
ret, outs[0], outs[1], outs[2], outs[3],
outs[4], outs[5], outs[6], outs[7],
outs[8]);
} else
ehca_gen_dbg("OUTPUT -- ret=%li " HCALL9_REGS_FORMAT,
ret, outs[0], outs[1], outs[2], outs[3],
outs[4], outs[5], outs[6], outs[7],
outs[8]);
ehca_gen_dbg("opcode=%lx ret=%lx out1=%lx out2=%lx out3=%lx "
"out4=%lx out5=%lx out6=%lx out7=%lx out8=%lx "
"out9=%lx",
opcode, ret, outs[0], outs[1], outs[2], outs[3],
outs[4], outs[5], outs[6], outs[7], outs[8]);
return ret;
}
......@@ -247,7 +252,7 @@ u64 hipz_h_alloc_resource_eq(const struct ipz_adapter_handle adapter_handle,
*eq_ist = (u32)outs[5];
if (ret == H_NOT_ENOUGH_RESOURCES)
ehca_gen_err("Not enough resource - ret=%lx ", ret);
ehca_gen_err("Not enough resource - ret=%li ", ret);
return ret;
}
......@@ -285,7 +290,7 @@ u64 hipz_h_alloc_resource_cq(const struct ipz_adapter_handle adapter_handle,
hcp_galpas_ctor(&cq->galpas, outs[5], outs[6]);
if (ret == H_NOT_ENOUGH_RESOURCES)
ehca_gen_err("Not enough resources. ret=%lx", ret);
ehca_gen_err("Not enough resources. ret=%li", ret);
return ret;
}
......@@ -360,7 +365,7 @@ u64 hipz_h_alloc_resource_qp(const struct ipz_adapter_handle adapter_handle,
hcp_galpas_ctor(&parms->galpas, outs[6], outs[6]);
if (ret == H_NOT_ENOUGH_RESOURCES)
ehca_gen_err("Not enough resources. ret=%lx", ret);
ehca_gen_err("Not enough resources. ret=%li", ret);
return ret;
}
......@@ -555,7 +560,7 @@ u64 hipz_h_modify_qp(const struct ipz_adapter_handle adapter_handle,
0, 0, 0, 0, 0);
if (ret == H_NOT_ENOUGH_RESOURCES)
ehca_gen_err("Insufficient resources ret=%lx", ret);
ehca_gen_err("Insufficient resources ret=%li", ret);
return ret;
}
......@@ -591,7 +596,7 @@ u64 hipz_h_destroy_qp(const struct ipz_adapter_handle adapter_handle,
qp->ipz_qp_handle.handle, /* r6 */
0, 0, 0, 0, 0, 0);
if (ret == H_HARDWARE)
ehca_gen_err("HCA not operational. ret=%lx", ret);
ehca_gen_err("HCA not operational. ret=%li", ret);
ret = ehca_plpar_hcall_norets(H_FREE_RESOURCE,
adapter_handle.handle, /* r4 */
......@@ -599,7 +604,7 @@ u64 hipz_h_destroy_qp(const struct ipz_adapter_handle adapter_handle,
0, 0, 0, 0, 0);
if (ret == H_RESOURCE)
ehca_gen_err("Resource still in use. ret=%lx", ret);
ehca_gen_err("Resource still in use. ret=%li", ret);
return ret;
}
......@@ -634,7 +639,7 @@ u64 hipz_h_define_aqp1(const struct ipz_adapter_handle adapter_handle,
*bma_qp_nr = (u32)outs[1];
if (ret == H_ALIAS_EXIST)
ehca_gen_err("AQP1 already exists. ret=%lx", ret);
ehca_gen_err("AQP1 already exists. ret=%li", ret);
return ret;
}
......@@ -656,7 +661,7 @@ u64 hipz_h_attach_mcqp(const struct ipz_adapter_handle adapter_handle,
0, 0);
if (ret == H_NOT_ENOUGH_RESOURCES)
ehca_gen_err("Not enough resources. ret=%lx", ret);
ehca_gen_err("Not enough resources. ret=%li", ret);
return ret;
}
......@@ -695,7 +700,7 @@ u64 hipz_h_destroy_cq(const struct ipz_adapter_handle adapter_handle,
0, 0, 0, 0);
if (ret == H_RESOURCE)
ehca_gen_err("H_FREE_RESOURCE failed ret=%lx ", ret);
ehca_gen_err("H_FREE_RESOURCE failed ret=%li ", ret);
return ret;
}
......@@ -717,7 +722,7 @@ u64 hipz_h_destroy_eq(const struct ipz_adapter_handle adapter_handle,
0, 0, 0, 0, 0);
if (ret == H_RESOURCE)
ehca_gen_err("Resource in use. ret=%lx ", ret);
ehca_gen_err("Resource in use. ret=%li ", ret);
return ret;
}
......@@ -816,7 +821,7 @@ u64 hipz_h_free_resource_mr(const struct ipz_adapter_handle adapter_handle,
return ehca_plpar_hcall_norets(H_FREE_RESOURCE,
adapter_handle.handle, /* r4 */
mr->ipz_mr_handle.handle, /* r5 */
0, 0, 0, 0, 0);
0, 0, 0, 0, 5);
}
u64 hipz_h_reregister_pmr(const struct ipz_adapter_handle adapter_handle,
......
......@@ -158,6 +158,7 @@ static int alloc_small_queue_page(struct ipz_queue *queue, struct ehca_pd *pd)
queue->queue_pages[0] = (void *)(page->page | (bit << (order + 9)));
queue->small_page = page;
queue->offset = bit << (order + 9);
return 1;
out:
......
......@@ -189,6 +189,8 @@ typedef enum _ipath_ureg {
#define IPATH_RUNTIME_RCVHDR_COPY 0x8
#define IPATH_RUNTIME_MASTER 0x10
/* 0x20 and 0x40 are no longer used, but are reserved for ABI compatibility */
#define IPATH_RUNTIME_FORCE_PIOAVAIL 0x400
#define IPATH_RUNTIME_PIO_REGSWAPPED 0x800
/*
* This structure is returned by ipath_userinit() immediately after
......@@ -350,7 +352,7 @@ struct ipath_base_info {
* may not be implemented; the user code must deal with this if it
* cares, or it must abort after initialization reports the difference.
*/
#define IPATH_USER_SWMINOR 5
#define IPATH_USER_SWMINOR 6
#define IPATH_USER_SWVERSION ((IPATH_USER_SWMAJOR<<16) | IPATH_USER_SWMINOR)
......
......@@ -76,22 +76,25 @@ void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int solicited)
}
return;
}
wc->queue[head].wr_id = entry->wr_id;
wc->queue[head].status = entry->status;
wc->queue[head].opcode = entry->opcode;
wc->queue[head].vendor_err = entry->vendor_err;
wc->queue[head].byte_len = entry->byte_len;
wc->queue[head].imm_data = (__u32 __force)entry->imm_data;
wc->queue[head].qp_num = entry->qp->qp_num;
wc->queue[head].src_qp = entry->src_qp;
wc->queue[head].wc_flags = entry->wc_flags;
wc->queue[head].pkey_index = entry->pkey_index;
wc->queue[head].slid = entry->slid;
wc->queue[head].sl = entry->sl;
wc->queue[head].dlid_path_bits = entry->dlid_path_bits;
wc->queue[head].port_num = entry->port_num;
/* Make sure queue entry is written before the head index. */
smp_wmb();
if (cq->ip) {
wc->uqueue[head].wr_id = entry->wr_id;
wc->uqueue[head].status = entry->status;
wc->uqueue[head].opcode = entry->opcode;
wc->uqueue[head].vendor_err = entry->vendor_err;
wc->uqueue[head].byte_len = entry->byte_len;
wc->uqueue[head].imm_data = (__u32 __force)entry->imm_data;
wc->uqueue[head].qp_num = entry->qp->qp_num;
wc->uqueue[head].src_qp = entry->src_qp;
wc->uqueue[head].wc_flags = entry->wc_flags;
wc->uqueue[head].pkey_index = entry->pkey_index;
wc->uqueue[head].slid = entry->slid;
wc->uqueue[head].sl = entry->sl;
wc->uqueue[head].dlid_path_bits = entry->dlid_path_bits;
wc->uqueue[head].port_num = entry->port_num;
/* Make sure entry is written before the head index. */
smp_wmb();
} else
wc->kqueue[head] = *entry;
wc->head = next;
if (cq->notify == IB_CQ_NEXT_COMP ||
......@@ -130,6 +133,12 @@ int ipath_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
int npolled;
u32 tail;
/* The kernel can only poll a kernel completion queue */
if (cq->ip) {
npolled = -EINVAL;
goto bail;
}
spin_lock_irqsave(&cq->lock, flags);
wc = cq->queue;
......@@ -137,31 +146,10 @@ int ipath_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
if (tail > (u32) cq->ibcq.cqe)
tail = (u32) cq->ibcq.cqe;
for (npolled = 0; npolled < num_entries; ++npolled, ++entry) {
struct ipath_qp *qp;
if (tail == wc->head)
break;
/* Make sure entry is read after head index is read. */
smp_rmb();
qp = ipath_lookup_qpn(&to_idev(cq->ibcq.device)->qp_table,
wc->queue[tail].qp_num);
entry->qp = &qp->ibqp;
if (atomic_dec_and_test(&qp->refcount))
wake_up(&qp->wait);
entry->wr_id = wc->queue[tail].wr_id;
entry->status = wc->queue[tail].status;
entry->opcode = wc->queue[tail].opcode;
entry->vendor_err = wc->queue[tail].vendor_err;
entry->byte_len = wc->queue[tail].byte_len;
entry->imm_data = wc->queue[tail].imm_data;
entry->src_qp = wc->queue[tail].src_qp;
entry->wc_flags = wc->queue[tail].wc_flags;
entry->pkey_index = wc->queue[tail].pkey_index;
entry->slid = wc->queue[tail].slid;
entry->sl = wc->queue[tail].sl;
entry->dlid_path_bits = wc->queue[tail].dlid_path_bits;
entry->port_num = wc->queue[tail].port_num;
/* The kernel doesn't need a RMB since it has the lock. */
*entry = wc->kqueue[tail];
if (tail >= cq->ibcq.cqe)
tail = 0;
else
......@@ -171,6 +159,7 @@ int ipath_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
spin_unlock_irqrestore(&cq->lock, flags);
bail:
return npolled;
}
......@@ -215,6 +204,7 @@ struct ib_cq *ipath_create_cq(struct ib_device *ibdev, int entries, int comp_vec
struct ipath_cq *cq;
struct ipath_cq_wc *wc;
struct ib_cq *ret;
u32 sz;
if (entries < 1 || entries > ib_ipath_max_cqes) {
ret = ERR_PTR(-EINVAL);
......@@ -235,7 +225,12 @@ struct ib_cq *ipath_create_cq(struct ib_device *ibdev, int entries, int comp_vec
* We need to use vmalloc() in order to support mmap and large
* numbers of entries.
*/
wc = vmalloc_user(sizeof(*wc) + sizeof(struct ib_wc) * entries);
sz = sizeof(*wc);
if (udata && udata->outlen >= sizeof(__u64))
sz += sizeof(struct ib_uverbs_wc) * (entries + 1);
else
sz += sizeof(struct ib_wc) * (entries + 1);
wc = vmalloc_user(sz);
if (!wc) {
ret = ERR_PTR(-ENOMEM);
goto bail_cq;
......@@ -247,9 +242,8 @@ struct ib_cq *ipath_create_cq(struct ib_device *ibdev, int entries, int comp_vec
*/
if (udata && udata->outlen >= sizeof(__u64)) {
int err;
u32 s = sizeof *wc + sizeof(struct ib_wc) * entries;
cq->ip = ipath_create_mmap_info(dev, s, context, wc);
cq->ip = ipath_create_mmap_info(dev, sz, context, wc);
if (!cq->ip) {
ret = ERR_PTR(-ENOMEM);
goto bail_wc;
......@@ -380,6 +374,7 @@ int ipath_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
struct ipath_cq_wc *wc;
u32 head, tail, n;
int ret;
u32 sz;
if (cqe < 1 || cqe > ib_ipath_max_cqes) {
ret = -EINVAL;
......@@ -389,7 +384,12 @@ int ipath_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
/*
* Need to use vmalloc() if we want to support large #s of entries.
*/
wc = vmalloc_user(sizeof(*wc) + sizeof(struct ib_wc) * cqe);
sz = sizeof(*wc);
if (udata && udata->outlen >= sizeof(__u64))
sz += sizeof(struct ib_uverbs_wc) * (cqe + 1);
else
sz += sizeof(struct ib_wc) * (cqe + 1);
wc = vmalloc_user(sz);
if (!wc) {
ret = -ENOMEM;
goto bail;
......@@ -430,7 +430,10 @@ int ipath_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
goto bail;
}
for (n = 0; tail != head; n++) {
wc->queue[n] = old_wc->queue[tail];
if (cq->ip)
wc->uqueue[n] = old_wc->uqueue[tail];
else
wc->kqueue[n] = old_wc->kqueue[tail];
if (tail == (u32) cq->ibcq.cqe)
tail = 0;
else
......@@ -447,9 +450,8 @@ int ipath_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
if (cq->ip) {
struct ipath_ibdev *dev = to_idev(ibcq->device);
struct ipath_mmap_info *ip = cq->ip;
u32 s = sizeof *wc + sizeof(struct ib_wc) * cqe;
ipath_update_mmap_info(dev, ip, s, wc);
ipath_update_mmap_info(dev, ip, sz, wc);
spin_lock_irq(&dev->pending_lock);
if (list_empty(&ip->pending_mmaps))
list_add(&ip->pending_mmaps, &dev->pending_mmaps);
......
......@@ -446,19 +446,21 @@ static ssize_t ipath_diagpkt_write(struct file *fp,
dd->ipath_unit, plen - 1, pbufn);
if (dp.pbc_wd == 0)
/* Legacy operation, use computed pbc_wd */
dp.pbc_wd = plen;
/* we have to flush after the PBC for correctness on some cpus
* or WC buffer can be written out of order */
writeq(dp.pbc_wd, piobuf);
ipath_flush_wc();
/* copy all by the trigger word, then flush, so it's written
/*
* Copy all by the trigger word, then flush, so it's written
* to chip before trigger word, then write trigger word, then
* flush again, so packet is sent. */
__iowrite32_copy(piobuf + 2, tmpbuf, clen - 1);
ipath_flush_wc();
__raw_writel(tmpbuf[clen - 1], piobuf + clen + 1);
* flush again, so packet is sent.
*/
if (dd->ipath_flags & IPATH_PIO_FLUSH_WC) {
ipath_flush_wc();
__iowrite32_copy(piobuf + 2, tmpbuf, clen - 1);
ipath_flush_wc();
__raw_writel(tmpbuf[clen - 1], piobuf + clen + 1);
} else
__iowrite32_copy(piobuf + 2, tmpbuf, clen);
ipath_flush_wc();
ret = sizeof(dp);
......
......@@ -34,6 +34,7 @@
#include <linux/spinlock.h>
#include <linux/idr.h>
#include <linux/pci.h>
#include <linux/io.h>
#include <linux/delay.h>
#include <linux/netdevice.h>
#include <linux/vmalloc.h>
......@@ -280,6 +281,89 @@ void __attribute__((weak)) ipath_disable_wc(struct ipath_devdata *dd)
{
}
/*
* Perform a PIO buffer bandwidth write test, to verify proper system
* configuration. Even when all the setup calls work, occasionally
* BIOS or other issues can prevent write combining from working, or
* can cause other bandwidth problems to the chip.
*
* This test simply writes the same buffer over and over again, and
* measures close to the peak bandwidth to the chip (not testing
* data bandwidth to the wire). On chips that use an address-based
* trigger to send packets to the wire, this is easy. On chips that
* use a count to trigger, we want to make sure that the packet doesn't
* go out on the wire, or trigger flow control checks.
*/
static void ipath_verify_pioperf(struct ipath_devdata *dd)
{
u32 pbnum, cnt, lcnt;
u32 __iomem *piobuf;
u32 *addr;
u64 msecs, emsecs;
piobuf = ipath_getpiobuf(dd, &pbnum);
if (!piobuf) {
dev_info(&dd->pcidev->dev,
"No PIObufs for checking perf, skipping\n");
return;
}
/*
* Enough to give us a reasonable test, less than piobuf size, and
* likely multiple of store buffer length.
*/
cnt = 1024;
addr = vmalloc(cnt);
if (!addr) {
dev_info(&dd->pcidev->dev,
"Couldn't get memory for checking PIO perf,"
" skipping\n");
goto done;
}
preempt_disable(); /* we want reasonably accurate elapsed time */
msecs = 1 + jiffies_to_msecs(jiffies);
for (lcnt = 0; lcnt < 10000U; lcnt++) {
/* wait until we cross msec boundary */
if (jiffies_to_msecs(jiffies) >= msecs)
break;
udelay(1);
}
writeq(0, piobuf); /* length 0, no dwords actually sent */
ipath_flush_wc();
/*
* this is only roughly accurate, since even with preempt we
* still take interrupts that could take a while. Running for
* >= 5 msec seems to get us "close enough" to accurate values
*/
msecs = jiffies_to_msecs(jiffies);
for (emsecs = lcnt = 0; emsecs <= 5UL; lcnt++) {
__iowrite32_copy(piobuf + 64, addr, cnt >> 2);
emsecs = jiffies_to_msecs(jiffies) - msecs;
}
/* 1 GiB/sec, slightly over IB SDR line rate */
if (lcnt < (emsecs * 1024U))
ipath_dev_err(dd,
"Performance problem: bandwidth to PIO buffers is "
"only %u MiB/sec\n",
lcnt / (u32) emsecs);
else
ipath_dbg("PIO buffer bandwidth %u MiB/sec is OK\n",
lcnt / (u32) emsecs);
preempt_enable();
vfree(addr);
done:
/* disarm piobuf, so it's available again */
ipath_disarm_piobufs(dd, pbnum, 1);
}
static int __devinit ipath_init_one(struct pci_dev *pdev,
const struct pci_device_id *ent)
{
......@@ -298,8 +382,6 @@ static int __devinit ipath_init_one(struct pci_dev *pdev,
ipath_cdbg(VERBOSE, "initializing unit #%u\n", dd->ipath_unit);
read_bars(dd, pdev, &bar0, &bar1);
ret = pci_enable_device(pdev);
if (ret) {
/* This can happen iff:
......@@ -445,9 +527,6 @@ static int __devinit ipath_init_one(struct pci_dev *pdev,
goto bail_regions;
}
dd->ipath_deviceid = ent->device; /* save for later use */
dd->ipath_vendorid = ent->vendor;
dd->ipath_pcirev = pdev->revision;
#if defined(__powerpc__)
......@@ -515,6 +594,8 @@ static int __devinit ipath_init_one(struct pci_dev *pdev,
ret = 0;
}
ipath_verify_pioperf(dd);
ipath_device_create_group(&pdev->dev, dd);
ipathfs_add_device(dd);
ipath_user_add(dd);
......@@ -2005,6 +2086,8 @@ void ipath_shutdown_device(struct ipath_devdata *dd)
INFINIPATH_IBCC_LINKINITCMD_SHIFT);
ipath_cancel_sends(dd, 0);
signal_ib_event(dd, IB_EVENT_PORT_ERR);
/* disable IBC */
dd->ipath_control &= ~INFINIPATH_C_LINKENABLE;
ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
......
......@@ -596,7 +596,11 @@ void ipath_get_eeprom_info(struct ipath_devdata *dd)
goto bail;
}
len = offsetof(struct ipath_flash, if_future);
/*
* read full flash, not just currently used part, since it may have
* been written with a newer definition
* */
len = sizeof(struct ipath_flash);
buf = vmalloc(len);
if (!buf) {
ipath_dev_err(dd, "Couldn't allocate memory to read %u "
......@@ -737,8 +741,10 @@ int ipath_update_eeprom_log(struct ipath_devdata *dd)
/*
* The quick-check above determined that there is something worthy
* of logging, so get current contents and do a more detailed idea.
* read full flash, not just currently used part, since it may have
* been written with a newer definition
*/
len = offsetof(struct ipath_flash, if_future);
len = sizeof(struct ipath_flash);
buf = vmalloc(len);
ret = 1;
if (!buf) {
......
......@@ -538,6 +538,9 @@ static int ipath_tid_free(struct ipath_portdata *pd, unsigned subport,
continue;
cnt++;
if (dd->ipath_pageshadow[porttid + tid]) {
struct page *p;
p = dd->ipath_pageshadow[porttid + tid];
dd->ipath_pageshadow[porttid + tid] = NULL;
ipath_cdbg(VERBOSE, "PID %u freeing TID %u\n",
pd->port_pid, tid);
dd->ipath_f_put_tid(dd, &tidbase[tid],
......@@ -546,9 +549,7 @@ static int ipath_tid_free(struct ipath_portdata *pd, unsigned subport,
pci_unmap_page(dd->pcidev,
dd->ipath_physshadow[porttid + tid],
PAGE_SIZE, PCI_DMA_FROMDEVICE);
ipath_release_user_pages(
&dd->ipath_pageshadow[porttid + tid], 1);
dd->ipath_pageshadow[porttid + tid] = NULL;
ipath_release_user_pages(&p, 1);
ipath_stats.sps_pageunlocks++;
} else
ipath_dbg("Unused tid %u, ignoring\n", tid);
......@@ -1341,6 +1342,19 @@ static int ipath_mmap(struct file *fp, struct vm_area_struct *vma)
return ret;
}
static unsigned ipath_poll_hdrqfull(struct ipath_portdata *pd)
{
unsigned pollflag = 0;
if ((pd->poll_type & IPATH_POLL_TYPE_OVERFLOW) &&
pd->port_hdrqfull != pd->port_hdrqfull_poll) {
pollflag |= POLLIN | POLLRDNORM;
pd->port_hdrqfull_poll = pd->port_hdrqfull;
}
return pollflag;
}
static unsigned int ipath_poll_urgent(struct ipath_portdata *pd,
struct file *fp,
struct poll_table_struct *pt)
......@@ -1350,22 +1364,20 @@ static unsigned int ipath_poll_urgent(struct ipath_portdata *pd,
dd = pd->port_dd;
if (test_bit(IPATH_PORT_WAITING_OVERFLOW, &pd->int_flag)) {
pollflag |= POLLERR;
clear_bit(IPATH_PORT_WAITING_OVERFLOW, &pd->int_flag);
}
/* variable access in ipath_poll_hdrqfull() needs this */
rmb();
pollflag = ipath_poll_hdrqfull(pd);
if (test_bit(IPATH_PORT_WAITING_URG, &pd->int_flag)) {
if (pd->port_urgent != pd->port_urgent_poll) {
pollflag |= POLLIN | POLLRDNORM;
clear_bit(IPATH_PORT_WAITING_URG, &pd->int_flag);
pd->port_urgent_poll = pd->port_urgent;
}
if (!pollflag) {
/* this saves a spin_lock/unlock in interrupt handler... */
set_bit(IPATH_PORT_WAITING_URG, &pd->port_flag);
if (pd->poll_type & IPATH_POLL_TYPE_OVERFLOW)
set_bit(IPATH_PORT_WAITING_OVERFLOW,
&pd->port_flag);
/* flush waiting flag so don't miss an event... */
wmb();
poll_wait(fp, &pd->port_wait, pt);
}
......@@ -1376,31 +1388,27 @@ static unsigned int ipath_poll_next(struct ipath_portdata *pd,
struct file *fp,
struct poll_table_struct *pt)
{
u32 head, tail;
u32 head;
u32 tail;
unsigned pollflag = 0;
struct ipath_devdata *dd;
dd = pd->port_dd;
/* variable access in ipath_poll_hdrqfull() needs this */
rmb();
pollflag = ipath_poll_hdrqfull(pd);
head = ipath_read_ureg32(dd, ur_rcvhdrhead, pd->port_port);
tail = *(volatile u64 *)pd->port_rcvhdrtail_kvaddr;
if (test_bit(IPATH_PORT_WAITING_OVERFLOW, &pd->int_flag)) {
pollflag |= POLLERR;
clear_bit(IPATH_PORT_WAITING_OVERFLOW, &pd->int_flag);
}
if (tail != head ||
test_bit(IPATH_PORT_WAITING_RCV, &pd->int_flag)) {
if (head != tail)
pollflag |= POLLIN | POLLRDNORM;
clear_bit(IPATH_PORT_WAITING_RCV, &pd->int_flag);
}
if (!pollflag) {
else {
/* this saves a spin_lock/unlock in interrupt handler */
set_bit(IPATH_PORT_WAITING_RCV, &pd->port_flag);
if (pd->poll_type & IPATH_POLL_TYPE_OVERFLOW)
set_bit(IPATH_PORT_WAITING_OVERFLOW,
&pd->port_flag);
/* flush waiting flag so we don't miss an event */
wmb();
set_bit(pd->port_port + INFINIPATH_R_INTRAVAIL_SHIFT,
&dd->ipath_rcvctrl);
......@@ -1917,6 +1925,12 @@ static int ipath_do_user_init(struct file *fp,
ipath_cdbg(VERBOSE, "Wrote port%d egrhead %x from tail regs\n",
pd->port_port, head32);
pd->port_tidcursor = 0; /* start at beginning after open */
/* initialize poll variables... */
pd->port_urgent = 0;
pd->port_urgent_poll = 0;
pd->port_hdrqfull_poll = pd->port_hdrqfull;
/*
* now enable the port; the tail registers will be written to memory
* by the chip as soon as it sees the write to
......@@ -2039,9 +2053,11 @@ static int ipath_close(struct inode *in, struct file *fp)
if (dd->ipath_kregbase) {
int i;
/* atomically clear receive enable port. */
/* atomically clear receive enable port and intr avail. */
clear_bit(INFINIPATH_R_PORTENABLE_SHIFT + port,
&dd->ipath_rcvctrl);
clear_bit(pd->port_port + INFINIPATH_R_INTRAVAIL_SHIFT,
&dd->ipath_rcvctrl);
ipath_write_kreg( dd, dd->ipath_kregs->kr_rcvctrl,
dd->ipath_rcvctrl);
/* and read back from chip to be sure that nothing
......
......@@ -130,175 +130,6 @@ static const struct file_operations atomic_counters_ops = {
.read = atomic_counters_read,
};
static ssize_t atomic_node_info_read(struct file *file, char __user *buf,
size_t count, loff_t *ppos)
{
u32 nodeinfo[10];
struct ipath_devdata *dd;
u64 guid;
dd = file->f_path.dentry->d_inode->i_private;
guid = be64_to_cpu(dd->ipath_guid);
nodeinfo[0] = /* BaseVersion is SMA */
/* ClassVersion is SMA */
(1 << 8) /* NodeType */
| (1 << 0); /* NumPorts */
nodeinfo[1] = (u32) (guid >> 32);
nodeinfo[2] = (u32) (guid & 0xffffffff);
/* PortGUID == SystemImageGUID for us */
nodeinfo[3] = nodeinfo[1];
/* PortGUID == SystemImageGUID for us */
nodeinfo[4] = nodeinfo[2];
/* PortGUID == NodeGUID for us */
nodeinfo[5] = nodeinfo[3];
/* PortGUID == NodeGUID for us */
nodeinfo[6] = nodeinfo[4];
nodeinfo[7] = (4 << 16) /* we support 4 pkeys */
| (dd->ipath_deviceid << 0);
/* our chip version as 16 bits major, 16 bits minor */
nodeinfo[8] = dd->ipath_minrev | (dd->ipath_majrev << 16);
nodeinfo[9] = (dd->ipath_unit << 24) | (dd->ipath_vendorid << 0);
return simple_read_from_buffer(buf, count, ppos, nodeinfo,
sizeof nodeinfo);
}
static const struct file_operations atomic_node_info_ops = {
.read = atomic_node_info_read,
};
static ssize_t atomic_port_info_read(struct file *file, char __user *buf,
size_t count, loff_t *ppos)
{
u32 portinfo[13];
u32 tmp, tmp2;
struct ipath_devdata *dd;
dd = file->f_path.dentry->d_inode->i_private;
/* so we only initialize non-zero fields. */
memset(portinfo, 0, sizeof portinfo);
/*
* Notimpl yet M_Key (64)
* Notimpl yet GID (64)
*/
portinfo[4] = (dd->ipath_lid << 16);
/*
* Notimpl yet SMLID.
* CapabilityMask is 0, we don't support any of these
* DiagCode is 0; we don't store any diag info for now Notimpl yet
* M_KeyLeasePeriod (we don't support M_Key)
*/
/* LocalPortNum is whichever port number they ask for */
portinfo[7] = (dd->ipath_unit << 24)
/* LinkWidthEnabled */
| (2 << 16)
/* LinkWidthSupported (really 2, but not IB valid) */
| (3 << 8)
/* LinkWidthActive */
| (2 << 0);
tmp = dd->ipath_lastibcstat & IPATH_IBSTATE_MASK;
tmp2 = 5;
if (tmp == IPATH_IBSTATE_INIT)
tmp = 2;
else if (tmp == IPATH_IBSTATE_ARM)
tmp = 3;
else if (tmp == IPATH_IBSTATE_ACTIVE)
tmp = 4;
else {
tmp = 0; /* down */
tmp2 = tmp & 0xf;
}
portinfo[8] = (1 << 28) /* LinkSpeedSupported */
| (tmp << 24) /* PortState */
| (tmp2 << 20) /* PortPhysicalState */
| (2 << 16)
/* LinkDownDefaultState */
/* M_KeyProtectBits == 0 */
/* NotImpl yet LMC == 0 (we can support all values) */
| (1 << 4) /* LinkSpeedActive */
| (1 << 0); /* LinkSpeedEnabled */
switch (dd->ipath_ibmtu) {
case 4096:
tmp = 5;
break;
case 2048:
tmp = 4;
break;
case 1024:
tmp = 3;
break;
case 512:
tmp = 2;
break;
case 256:
tmp = 1;
break;
default: /* oops, something is wrong */
ipath_dbg("Problem, ipath_ibmtu 0x%x not a valid IB MTU, "
"treat as 2048\n", dd->ipath_ibmtu);
tmp = 4;
break;
}
portinfo[9] = (tmp << 28)
/* NeighborMTU */
/* Notimpl MasterSMSL */
| (1 << 20)
/* VLCap */
/* Notimpl InitType (actually, an SMA decision) */
/* VLHighLimit is 0 (only one VL) */
; /* VLArbitrationHighCap is 0 (only one VL) */
/*
* Note: the chips support a maximum MTU of 4096, but the driver
* hasn't implemented this feature yet, so set the maximum
* to 2048.
*/
portinfo[10] = /* VLArbitrationLowCap is 0 (only one VL) */
/* InitTypeReply is SMA decision */
(4 << 16) /* MTUCap 2048 */
| (7 << 13) /* VLStallCount */
| (0x1f << 8) /* HOQLife */
| (1 << 4)
/* OperationalVLs 0 */
/* PartitionEnforcementInbound */
/* PartitionEnforcementOutbound not enforced */
/* FilterRawinbound not enforced */
; /* FilterRawOutbound not enforced */
/* M_KeyViolations are not counted by hardware, SMA can count */
tmp = ipath_read_creg32(dd, dd->ipath_cregs->cr_errpkey);
/* P_KeyViolations are counted by hardware. */
portinfo[11] = ((tmp & 0xffff) << 0);
portinfo[12] =
/* Q_KeyViolations are not counted by hardware */
(1 << 8)
/* GUIDCap */
/* SubnetTimeOut handled by SMA */
/* RespTimeValue handled by SMA */
;
/* LocalPhyErrors are programmed to max */
portinfo[12] |= (0xf << 20)
| (0xf << 16) /* OverRunErrors are programmed to max */
;
return simple_read_from_buffer(buf, count, ppos, portinfo,
sizeof portinfo);
}
static const struct file_operations atomic_port_info_ops = {
.read = atomic_port_info_read,
};
static ssize_t flash_read(struct file *file, char __user *buf,
size_t count, loff_t *ppos)
{
......@@ -427,22 +258,6 @@ static int create_device_files(struct super_block *sb,
goto bail;
}
ret = create_file("node_info", S_IFREG|S_IRUGO, dir, &tmp,
&atomic_node_info_ops, dd);
if (ret) {
printk(KERN_ERR "create_file(%s/node_info) "
"failed: %d\n", unit, ret);
goto bail;
}
ret = create_file("port_info", S_IFREG|S_IRUGO, dir, &tmp,
&atomic_port_info_ops, dd);
if (ret) {
printk(KERN_ERR "create_file(%s/port_info) "
"failed: %d\n", unit, ret);
goto bail;
}
ret = create_file("flash", S_IFREG|S_IWUSR|S_IRUGO, dir, &tmp,
&flash_ops, dd);
if (ret) {
......@@ -508,8 +323,6 @@ static int remove_device_files(struct super_block *sb,
}
remove_file(dir, "flash");
remove_file(dir, "port_info");
remove_file(dir, "node_info");
remove_file(dir, "atomic_counters");
d_delete(dir);
ret = simple_rmdir(root->d_inode, dir);
......
......@@ -631,56 +631,35 @@ static int ipath_ht_boardname(struct ipath_devdata *dd, char *name,
{
char *n = NULL;
u8 boardrev = dd->ipath_boardrev;
int ret;
int ret = 0;
switch (boardrev) {
case 4: /* Ponderosa is one of the bringup boards */
n = "Ponderosa";
break;
case 5:
/*
* original production board; two production levels, with
* different serial number ranges. See ipath_ht_early_init() for
* case where we enable IPATH_GPIO_INTR for later serial # range.
* Original 112* serial number is no longer supported.
*/
n = "InfiniPath_QHT7040";
break;
case 6:
n = "OEM_Board_3";
break;
case 7:
/* small form factor production board */
n = "InfiniPath_QHT7140";
break;
case 8:
n = "LS/X-1";
break;
case 9: /* Comstock bringup test board */
n = "Comstock";
break;
case 10:
n = "OEM_Board_2";
break;
case 11:
n = "InfiniPath_HT-470"; /* obsoleted */
break;
case 12:
n = "OEM_Board_4";
break;
default: /* don't know, just print the number */
ipath_dev_err(dd, "Don't yet know about board "
"with ID %u\n", boardrev);
snprintf(name, namelen, "Unknown_InfiniPath_QHT7xxx_%u",
boardrev);
ret = 1;
break;
}
if (n)
snprintf(name, namelen, "%s", n);
if (dd->ipath_boardrev != 6 && dd->ipath_boardrev != 7 &&
dd->ipath_boardrev != 11) {
if (ret) {
ipath_dev_err(dd, "Unsupported InfiniPath board %s!\n", name);
ret = 1;
goto bail;
}
if (dd->ipath_majrev != 3 || (dd->ipath_minrev < 2 ||
......@@ -1554,10 +1533,25 @@ static int ipath_ht_early_init(struct ipath_devdata *dd)
* can use GPIO interrupts. They have serial #'s starting
* with 128, rather than 112.
*/
dd->ipath_flags |= IPATH_GPIO_INTR;
} else
ipath_dev_err(dd, "Unsupported InfiniPath serial "
"number %.16s!\n", dd->ipath_serial);
if (dd->ipath_serial[0] == '1' &&
dd->ipath_serial[1] == '2' &&
dd->ipath_serial[2] == '8')
dd->ipath_flags |= IPATH_GPIO_INTR;
else {
ipath_dev_err(dd, "Unsupported InfiniPath board "
"(serial number %.16s)!\n",
dd->ipath_serial);
return 1;
}
}
if (dd->ipath_minrev >= 4) {
/* Rev4+ reports extra errors via internal GPIO pins */
dd->ipath_flags |= IPATH_GPIO_ERRINTRS;
dd->ipath_gpio_mask |= IPATH_GPIO_ERRINTR_MASK;
ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask,
dd->ipath_gpio_mask);
}
return 0;
}
......@@ -1592,7 +1586,10 @@ static int ipath_ht_get_base_info(struct ipath_portdata *pd, void *kbase)
struct ipath_base_info *kinfo = kbase;
kinfo->spi_runtime_flags |= IPATH_RUNTIME_HT |
IPATH_RUNTIME_RCVHDR_COPY;
IPATH_RUNTIME_PIO_REGSWAPPED;
if (pd->port_dd->ipath_minrev < 4)
kinfo->spi_runtime_flags |= IPATH_RUNTIME_RCVHDR_COPY;
return 0;
}
......
......@@ -1143,11 +1143,14 @@ static void ipath_pe_put_tid(struct ipath_devdata *dd, u64 __iomem *tidptr,
pa |= 2 << 29;
}
/* workaround chip bug 9437 by writing each TID twice
* and holding a spinlock around the writes, so they don't
* intermix with other TID (eager or expected) writes
* Unfortunately, this call can be done from interrupt level
* for the port 0 eager TIDs, so we have to use irqsave
/*
* Workaround chip bug 9437 by writing the scratch register
* before and after the TID, and with an io write barrier.
* We use a spinlock around the writes, so they can't intermix
* with other TID (eager or expected) writes (the chip bug
* is triggered by back to back TID writes). Unfortunately, this
* call can be done from interrupt level for the port 0 eager TIDs,
* so we have to use irqsave locks.
*/
spin_lock_irqsave(&dd->ipath_tid_lock, flags);
ipath_write_kreg(dd, dd->ipath_kregs->kr_scratch, 0xfeeddeaf);
......@@ -1273,6 +1276,8 @@ static void ipath_pe_tidtemplate(struct ipath_devdata *dd)
static int ipath_pe_early_init(struct ipath_devdata *dd)
{
dd->ipath_flags |= IPATH_4BYTE_TID;
if (ipath_unordered_wc())
dd->ipath_flags |= IPATH_PIO_FLUSH_WC;
/*
* For openfabrics, we need to be able to handle an IB header of
......@@ -1343,7 +1348,8 @@ static int ipath_pe_get_base_info(struct ipath_portdata *pd, void *kbase)
dd = pd->port_dd;
done:
kinfo->spi_runtime_flags |= IPATH_RUNTIME_PCIE;
kinfo->spi_runtime_flags |= IPATH_RUNTIME_PCIE |
IPATH_RUNTIME_FORCE_PIOAVAIL | IPATH_RUNTIME_PIO_REGSWAPPED;
return 0;
}
......
......@@ -275,6 +275,16 @@ static char *ib_linkstate(u32 linkstate)
return ret;
}
void signal_ib_event(struct ipath_devdata *dd, enum ib_event_type ev)
{
struct ib_event event;
event.device = &dd->verbs_dev->ibdev;
event.element.port_num = 1;
event.event = ev;
ib_dispatch_event(&event);
}
static void handle_e_ibstatuschanged(struct ipath_devdata *dd,
ipath_err_t errs, int noprint)
{
......@@ -373,6 +383,8 @@ static void handle_e_ibstatuschanged(struct ipath_devdata *dd,
dd->ipath_ibpollcnt = 0; /* some state other than 2 or 3 */
ipath_stats.sps_iblink++;
if (ltstate != INFINIPATH_IBCS_LT_STATE_LINKUP) {
if (dd->ipath_flags & IPATH_LINKACTIVE)
signal_ib_event(dd, IB_EVENT_PORT_ERR);
dd->ipath_flags |= IPATH_LINKDOWN;
dd->ipath_flags &= ~(IPATH_LINKUNK | IPATH_LINKINIT
| IPATH_LINKACTIVE |
......@@ -405,7 +417,10 @@ static void handle_e_ibstatuschanged(struct ipath_devdata *dd,
*dd->ipath_statusp |=
IPATH_STATUS_IB_READY | IPATH_STATUS_IB_CONF;
dd->ipath_f_setextled(dd, lstate, ltstate);
signal_ib_event(dd, IB_EVENT_PORT_ACTIVE);
} else if ((val & IPATH_IBSTATE_MASK) == IPATH_IBSTATE_INIT) {
if (dd->ipath_flags & IPATH_LINKACTIVE)
signal_ib_event(dd, IB_EVENT_PORT_ERR);
/*
* set INIT and DOWN. Down is checked by most of the other
* code, but INIT is useful to know in a few places.
......@@ -418,6 +433,8 @@ static void handle_e_ibstatuschanged(struct ipath_devdata *dd,
| IPATH_STATUS_IB_READY);
dd->ipath_f_setextled(dd, lstate, ltstate);
} else if ((val & IPATH_IBSTATE_MASK) == IPATH_IBSTATE_ARM) {
if (dd->ipath_flags & IPATH_LINKACTIVE)
signal_ib_event(dd, IB_EVENT_PORT_ERR);
dd->ipath_flags |= IPATH_LINKARMED;
dd->ipath_flags &=
~(IPATH_LINKUNK | IPATH_LINKDOWN | IPATH_LINKINIT |
......@@ -688,17 +705,9 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
chkerrpkts = 1;
dd->ipath_lastrcvhdrqtails[i] = tl;
pd->port_hdrqfull++;
if (test_bit(IPATH_PORT_WAITING_OVERFLOW,
&pd->port_flag)) {
clear_bit(
IPATH_PORT_WAITING_OVERFLOW,
&pd->port_flag);
set_bit(
IPATH_PORT_WAITING_OVERFLOW,
&pd->int_flag);
wake_up_interruptible(
&pd->port_wait);
}
/* flush hdrqfull so that poll() sees it */
wmb();
wake_up_interruptible(&pd->port_wait);
}
}
}
......@@ -960,6 +969,8 @@ static void handle_urcv(struct ipath_devdata *dd, u32 istat)
int i;
int rcvdint = 0;
/* test_bit below needs this... */
rmb();
portr = ((istat >> INFINIPATH_I_RCVAVAIL_SHIFT) &
dd->ipath_i_rcvavail_mask)
| ((istat >> INFINIPATH_I_RCVURG_SHIFT) &
......@@ -967,22 +978,15 @@ static void handle_urcv(struct ipath_devdata *dd, u32 istat)
for (i = 1; i < dd->ipath_cfgports; i++) {
struct ipath_portdata *pd = dd->ipath_pd[i];
if (portr & (1 << i) && pd && pd->port_cnt) {
if (test_bit(IPATH_PORT_WAITING_RCV,
&pd->port_flag)) {
clear_bit(IPATH_PORT_WAITING_RCV,
&pd->port_flag);
set_bit(IPATH_PORT_WAITING_RCV,
&pd->int_flag);
if (test_and_clear_bit(IPATH_PORT_WAITING_RCV,
&pd->port_flag)) {
clear_bit(i + INFINIPATH_R_INTRAVAIL_SHIFT,
&dd->ipath_rcvctrl);
wake_up_interruptible(&pd->port_wait);
rcvdint = 1;
} else if (test_bit(IPATH_PORT_WAITING_URG,
&pd->port_flag)) {
clear_bit(IPATH_PORT_WAITING_URG,
&pd->port_flag);
set_bit(IPATH_PORT_WAITING_URG,
&pd->int_flag);
} else if (test_and_clear_bit(IPATH_PORT_WAITING_URG,
&pd->port_flag)) {
pd->port_urgent++;
wake_up_interruptible(&pd->port_wait);
}
}
......@@ -1085,8 +1089,8 @@ irqreturn_t ipath_intr(int irq, void *data)
* GPIO_2 indicates (on some HT4xx boards) that a packet
* has arrived for Port 0. Checking for this
* is controlled by flag IPATH_GPIO_INTR.
* GPIO_3..5 on IBA6120 Rev2 chips indicate errors
* that we need to count. Checking for this
* GPIO_3..5 on IBA6120 Rev2 and IBA6110 Rev4 chips indicate
* errors that we need to count. Checking for this
* is controlled by flag IPATH_GPIO_ERRINTRS.
*/
u32 gpiostatus;
......@@ -1137,10 +1141,8 @@ irqreturn_t ipath_intr(int irq, void *data)
/*
* Some unexpected bits remain. If they could have
* caused the interrupt, complain and clear.
* MEA: this is almost certainly non-ideal.
* we should look into auto-disable of unexpected
* GPIO interrupts, possibly on a "three strikes"
* basis.
* To avoid repetition of this condition, also clear
* the mask. It is almost certainly due to error.
*/
const u32 mask = (u32) dd->ipath_gpio_mask;
......@@ -1148,6 +1150,10 @@ irqreturn_t ipath_intr(int irq, void *data)
ipath_dbg("Unexpected GPIO IRQ bits %x\n",
gpiostatus & mask);
to_clear |= (gpiostatus & mask);
dd->ipath_gpio_mask &= ~(gpiostatus & mask);
ipath_write_kreg(dd,
dd->ipath_kregs->kr_gpio_mask,
dd->ipath_gpio_mask);
}
}
if (to_clear) {
......
......@@ -42,6 +42,7 @@
#include <linux/pci.h>
#include <linux/dma-mapping.h>
#include <asm/io.h>
#include <rdma/ib_verbs.h>
#include "ipath_common.h"
#include "ipath_debug.h"
......@@ -139,6 +140,12 @@ struct ipath_portdata {
u32 port_pionowait;
/* total number of rcvhdrqfull errors */
u32 port_hdrqfull;
/* saved total number of rcvhdrqfull errors for poll edge trigger */
u32 port_hdrqfull_poll;
/* total number of polled urgent packets */
u32 port_urgent;
/* saved total number of polled urgent packets for poll edge trigger */
u32 port_urgent_poll;
/* pid of process using this port */
pid_t port_pid;
/* same size as task_struct .comm[] */
......@@ -724,6 +731,8 @@ int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv);
#define IPATH_LINKACTIVE 0x200
/* link current state is unknown */
#define IPATH_LINKUNK 0x400
/* Write combining flush needed for PIO */
#define IPATH_PIO_FLUSH_WC 0x1000
/* no IB cable, or no device on IB cable */
#define IPATH_NOCABLE 0x4000
/* Supports port zero per packet receive interrupts via
......@@ -755,8 +764,6 @@ int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv);
#define IPATH_PORT_MASTER_UNINIT 4
/* waiting for an urgent packet to arrive */
#define IPATH_PORT_WAITING_URG 5
/* waiting for a header overflow */
#define IPATH_PORT_WAITING_OVERFLOW 6
/* free up any allocated data at closes */
void ipath_free_data(struct ipath_portdata *dd);
......@@ -769,6 +776,7 @@ void ipath_get_eeprom_info(struct ipath_devdata *);
int ipath_update_eeprom_log(struct ipath_devdata *dd);
void ipath_inc_eeprom_err(struct ipath_devdata *dd, u32 eidx, u32 incr);
u64 ipath_snap_cntr(struct ipath_devdata *, ipath_creg);
void signal_ib_event(struct ipath_devdata *dd, enum ib_event_type ev);
/*
* Set LED override, only the two LSBs have "public" meaning, but
......
......@@ -245,7 +245,7 @@ static int recv_subn_get_portinfo(struct ib_smp *smp,
/* Only return the mkey if the protection field allows it. */
if (smp->method == IB_MGMT_METHOD_SET || dev->mkey == smp->mkey ||
(dev->mkeyprot_resv_lmc >> 6) == 0)
dev->mkeyprot == 0)
pip->mkey = dev->mkey;
pip->gid_prefix = dev->gid_prefix;
lid = dev->dd->ipath_lid;
......@@ -264,7 +264,7 @@ static int recv_subn_get_portinfo(struct ib_smp *smp,
pip->portphysstate_linkdown =
(ipath_cvt_physportstate[ibcstat & 0xf] << 4) |
(get_linkdowndefaultstate(dev->dd) ? 1 : 2);
pip->mkeyprot_resv_lmc = dev->mkeyprot_resv_lmc;
pip->mkeyprot_resv_lmc = (dev->mkeyprot << 6) | dev->dd->ipath_lmc;
pip->linkspeedactive_enabled = 0x11; /* 2.5Gbps, 2.5Gbps */
switch (dev->dd->ipath_ibmtu) {
case 4096:
......@@ -401,7 +401,7 @@ static int recv_subn_set_portinfo(struct ib_smp *smp,
struct ib_port_info *pip = (struct ib_port_info *)smp->data;
struct ib_event event;
struct ipath_ibdev *dev;
u32 flags;
struct ipath_devdata *dd;
char clientrereg = 0;
u16 lid, smlid;
u8 lwe;
......@@ -415,6 +415,7 @@ static int recv_subn_set_portinfo(struct ib_smp *smp,
goto err;
dev = to_idev(ibdev);
dd = dev->dd;
event.device = ibdev;
event.element.port_num = port;
......@@ -423,11 +424,12 @@ static int recv_subn_set_portinfo(struct ib_smp *smp,
dev->mkey_lease_period = be16_to_cpu(pip->mkey_lease_period);
lid = be16_to_cpu(pip->lid);
if (lid != dev->dd->ipath_lid) {
if (dd->ipath_lid != lid ||
dd->ipath_lmc != (pip->mkeyprot_resv_lmc & 7)) {
/* Must be a valid unicast LID address. */
if (lid == 0 || lid >= IPATH_MULTICAST_LID_BASE)
goto err;
ipath_set_lid(dev->dd, lid, pip->mkeyprot_resv_lmc & 7);
ipath_set_lid(dd, lid, pip->mkeyprot_resv_lmc & 7);
event.event = IB_EVENT_LID_CHANGE;
ib_dispatch_event(&event);
}
......@@ -461,18 +463,18 @@ static int recv_subn_set_portinfo(struct ib_smp *smp,
case 0: /* NOP */
break;
case 1: /* SLEEP */
if (set_linkdowndefaultstate(dev->dd, 1))
if (set_linkdowndefaultstate(dd, 1))
goto err;
break;
case 2: /* POLL */
if (set_linkdowndefaultstate(dev->dd, 0))
if (set_linkdowndefaultstate(dd, 0))
goto err;
break;
default:
goto err;
}
dev->mkeyprot_resv_lmc = pip->mkeyprot_resv_lmc;
dev->mkeyprot = pip->mkeyprot_resv_lmc >> 6;
dev->vl_high_limit = pip->vl_high_limit;
switch ((pip->neighbormtu_mastersmsl >> 4) & 0xF) {
......@@ -495,7 +497,7 @@ static int recv_subn_set_portinfo(struct ib_smp *smp,
/* XXX We have already partially updated our state! */
goto err;
}
ipath_set_mtu(dev->dd, mtu);
ipath_set_mtu(dd, mtu);
dev->sm_sl = pip->neighbormtu_mastersmsl & 0xF;
......@@ -511,16 +513,16 @@ static int recv_subn_set_portinfo(struct ib_smp *smp,
* later.
*/
if (pip->pkey_violations == 0)
dev->z_pkey_violations = ipath_get_cr_errpkey(dev->dd);
dev->z_pkey_violations = ipath_get_cr_errpkey(dd);
if (pip->qkey_violations == 0)
dev->qkey_violations = 0;
ore = pip->localphyerrors_overrunerrors;
if (set_phyerrthreshold(dev->dd, (ore >> 4) & 0xF))
if (set_phyerrthreshold(dd, (ore >> 4) & 0xF))
goto err;
if (set_overrunthreshold(dev->dd, (ore & 0xF)))
if (set_overrunthreshold(dd, (ore & 0xF)))
goto err;
dev->subnet_timeout = pip->clientrereg_resv_subnetto & 0x1F;
......@@ -538,7 +540,6 @@ static int recv_subn_set_portinfo(struct ib_smp *smp,
* is down or is being set to down.
*/
state = pip->linkspeed_portstate & 0xF;
flags = dev->dd->ipath_flags;
lstate = (pip->portphysstate_linkdown >> 4) & 0xF;
if (lstate && !(state == IB_PORT_DOWN || state == IB_PORT_NOP))
goto err;
......@@ -554,7 +555,7 @@ static int recv_subn_set_portinfo(struct ib_smp *smp,
/* FALLTHROUGH */
case IB_PORT_DOWN:
if (lstate == 0)
if (get_linkdowndefaultstate(dev->dd))
if (get_linkdowndefaultstate(dd))
lstate = IPATH_IB_LINKDOWN_SLEEP;
else
lstate = IPATH_IB_LINKDOWN;
......@@ -566,27 +567,13 @@ static int recv_subn_set_portinfo(struct ib_smp *smp,
lstate = IPATH_IB_LINKDOWN_DISABLE;
else
goto err;
ipath_set_linkstate(dev->dd, lstate);
if (flags & IPATH_LINKACTIVE) {
event.event = IB_EVENT_PORT_ERR;
ib_dispatch_event(&event);
}
ipath_set_linkstate(dd, lstate);
break;
case IB_PORT_ARMED:
if (!(flags & (IPATH_LINKINIT | IPATH_LINKACTIVE)))
break;
ipath_set_linkstate(dev->dd, IPATH_IB_LINKARM);
if (flags & IPATH_LINKACTIVE) {
event.event = IB_EVENT_PORT_ERR;
ib_dispatch_event(&event);
}
ipath_set_linkstate(dd, IPATH_IB_LINKARM);
break;
case IB_PORT_ACTIVE:
if (!(flags & IPATH_LINKARMED))
break;
ipath_set_linkstate(dev->dd, IPATH_IB_LINKACTIVE);
event.event = IB_EVENT_PORT_ACTIVE;
ib_dispatch_event(&event);
ipath_set_linkstate(dd, IPATH_IB_LINKACTIVE);
break;
default:
/* XXX We have already partially updated our state! */
......@@ -1350,7 +1337,7 @@ static int process_subn(struct ib_device *ibdev, int mad_flags,
if (dev->mkey_lease_timeout && jiffies >= dev->mkey_lease_timeout) {
/* Clear timeout and mkey protection field. */
dev->mkey_lease_timeout = 0;
dev->mkeyprot_resv_lmc &= 0x3F;
dev->mkeyprot = 0;
}
/*
......@@ -1361,7 +1348,7 @@ static int process_subn(struct ib_device *ibdev, int mad_flags,
dev->mkey != smp->mkey &&
(smp->method == IB_MGMT_METHOD_SET ||
(smp->method == IB_MGMT_METHOD_GET &&
(dev->mkeyprot_resv_lmc >> 7) != 0))) {
dev->mkeyprot >= 2))) {
if (dev->mkey_violations != 0xFFFF)
++dev->mkey_violations;
if (dev->mkey_lease_timeout ||
......
......@@ -338,6 +338,7 @@ static void ipath_reset_qp(struct ipath_qp *qp)
qp->s_busy = 0;
qp->s_flags &= IPATH_S_SIGNAL_REQ_WR;
qp->s_hdrwords = 0;
qp->s_wqe = NULL;
qp->s_psn = 0;
qp->r_psn = 0;
qp->r_msn = 0;
......@@ -376,13 +377,15 @@ static void ipath_reset_qp(struct ipath_qp *qp)
* @err: the receive completion error to signal if a RWQE is active
*
* Flushes both send and receive work queues.
* Returns true if last WQE event should be generated.
* The QP s_lock should be held and interrupts disabled.
*/
void ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err)
int ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err)
{
struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
struct ib_wc wc;
int ret = 0;
ipath_dbg("QP%d/%d in error state\n",
qp->ibqp.qp_num, qp->remote_qpn);
......@@ -453,7 +456,10 @@ void ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err)
wq->tail = tail;
spin_unlock(&qp->r_rq.lock);
}
} else if (qp->ibqp.event_handler)
ret = 1;
return ret;
}
/**
......@@ -472,6 +478,7 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
struct ipath_qp *qp = to_iqp(ibqp);
enum ib_qp_state cur_state, new_state;
unsigned long flags;
int lastwqe = 0;
int ret;
spin_lock_irqsave(&qp->s_lock, flags);
......@@ -531,7 +538,7 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
break;
case IB_QPS_ERR:
ipath_error_qp(qp, IB_WC_WR_FLUSH_ERR);
lastwqe = ipath_error_qp(qp, IB_WC_WR_FLUSH_ERR);
break;
default:
......@@ -590,6 +597,14 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
qp->state = new_state;
spin_unlock_irqrestore(&qp->s_lock, flags);
if (lastwqe) {
struct ib_event ev;
ev.device = qp->ibqp.device;
ev.element.qp = &qp->ibqp;
ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
}
ret = 0;
goto bail;
......@@ -751,6 +766,9 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd,
switch (init_attr->qp_type) {
case IB_QPT_UC:
case IB_QPT_RC:
case IB_QPT_UD:
case IB_QPT_SMI:
case IB_QPT_GSI:
sz = sizeof(struct ipath_sge) *
init_attr->cap.max_send_sge +
sizeof(struct ipath_swqe);
......@@ -759,10 +777,6 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd,
ret = ERR_PTR(-ENOMEM);
goto bail;
}
/* FALLTHROUGH */
case IB_QPT_UD:
case IB_QPT_SMI:
case IB_QPT_GSI:
sz = sizeof(*qp);
if (init_attr->srq) {
struct ipath_srq *srq = to_isrq(init_attr->srq);
......@@ -805,8 +819,7 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd,
spin_lock_init(&qp->r_rq.lock);
atomic_set(&qp->refcount, 0);
init_waitqueue_head(&qp->wait);
tasklet_init(&qp->s_task, ipath_do_ruc_send,
(unsigned long)qp);
tasklet_init(&qp->s_task, ipath_do_send, (unsigned long)qp);
INIT_LIST_HEAD(&qp->piowait);
INIT_LIST_HEAD(&qp->timerwait);
qp->state = IB_QPS_RESET;
......
......@@ -81,9 +81,8 @@ static void ipath_init_restart(struct ipath_qp *qp, struct ipath_swqe *wqe)
* Note that we are in the responder's side of the QP context.
* Note the QP s_lock must be held.
*/
static int ipath_make_rc_ack(struct ipath_qp *qp,
struct ipath_other_headers *ohdr,
u32 pmtu, u32 *bth0p, u32 *bth2p)
static int ipath_make_rc_ack(struct ipath_ibdev *dev, struct ipath_qp *qp,
struct ipath_other_headers *ohdr, u32 pmtu)
{
struct ipath_ack_entry *e;
u32 hwords;
......@@ -192,8 +191,7 @@ static int ipath_make_rc_ack(struct ipath_qp *qp,
}
qp->s_hdrwords = hwords;
qp->s_cur_size = len;
*bth0p = bth0 | (1 << 22); /* Set M bit */
*bth2p = bth2;
ipath_make_ruc_header(dev, qp, ohdr, bth0, bth2);
return 1;
bail:
......@@ -203,32 +201,39 @@ static int ipath_make_rc_ack(struct ipath_qp *qp,
/**
* ipath_make_rc_req - construct a request packet (SEND, RDMA r/w, ATOMIC)
* @qp: a pointer to the QP
* @ohdr: a pointer to the IB header being constructed
* @pmtu: the path MTU
* @bth0p: pointer to the BTH opcode word
* @bth2p: pointer to the BTH PSN word
*
* Return 1 if constructed; otherwise, return 0.
* Note the QP s_lock must be held and interrupts disabled.
*/
int ipath_make_rc_req(struct ipath_qp *qp,
struct ipath_other_headers *ohdr,
u32 pmtu, u32 *bth0p, u32 *bth2p)
int ipath_make_rc_req(struct ipath_qp *qp)
{
struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
struct ipath_other_headers *ohdr;
struct ipath_sge_state *ss;
struct ipath_swqe *wqe;
u32 hwords;
u32 len;
u32 bth0;
u32 bth2;
u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu);
char newreq;
unsigned long flags;
int ret = 0;
ohdr = &qp->s_hdr.u.oth;
if (qp->remote_ah_attr.ah_flags & IB_AH_GRH)
ohdr = &qp->s_hdr.u.l.oth;
/*
* The lock is needed to synchronize between the sending tasklet,
* the receive interrupt handler, and timeout resends.
*/
spin_lock_irqsave(&qp->s_lock, flags);
/* Sending responses has higher priority over sending requests. */
if ((qp->r_head_ack_queue != qp->s_tail_ack_queue ||
(qp->s_flags & IPATH_S_ACK_PENDING) ||
qp->s_ack_state != OP(ACKNOWLEDGE)) &&
ipath_make_rc_ack(qp, ohdr, pmtu, bth0p, bth2p))
ipath_make_rc_ack(dev, qp, ohdr, pmtu))
goto done;
if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) ||
......@@ -560,13 +565,12 @@ int ipath_make_rc_req(struct ipath_qp *qp,
qp->s_hdrwords = hwords;
qp->s_cur_sge = ss;
qp->s_cur_size = len;
*bth0p = bth0 | (qp->s_state << 24);
*bth2p = bth2;
ipath_make_ruc_header(dev, qp, ohdr, bth0 | (qp->s_state << 24), bth2);
done:
return 1;
ret = 1;
bail:
return 0;
spin_unlock_irqrestore(&qp->s_lock, flags);
return ret;
}
/**
......@@ -627,7 +631,7 @@ static void send_rc_ack(struct ipath_qp *qp)
/*
* If we can send the ACK, clear the ACK state.
*/
if (ipath_verbs_send(dev->dd, hwords, (u32 *) &hdr, 0, NULL) == 0) {
if (ipath_verbs_send(qp, &hdr, hwords, NULL, 0) == 0) {
dev->n_unicast_xmit++;
goto done;
}
......@@ -757,7 +761,9 @@ void ipath_restart_rc(struct ipath_qp *qp, u32 psn, struct ib_wc *wc)
wc->vendor_err = 0;
wc->byte_len = 0;
wc->qp = &qp->ibqp;
wc->imm_data = 0;
wc->src_qp = qp->remote_qpn;
wc->wc_flags = 0;
wc->pkey_index = 0;
wc->slid = qp->remote_ah_attr.dlid;
wc->sl = qp->remote_ah_attr.sl;
......@@ -1041,7 +1047,9 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode,
wc.vendor_err = 0;
wc.byte_len = 0;
wc.qp = &qp->ibqp;
wc.imm_data = 0;
wc.src_qp = qp->remote_qpn;
wc.wc_flags = 0;
wc.pkey_index = 0;
wc.slid = qp->remote_ah_attr.dlid;
wc.sl = qp->remote_ah_attr.sl;
......@@ -1453,6 +1461,19 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev,
qp->r_ack_psn = qp->r_psn - 1;
goto send_ack;
}
/*
* Try to send a simple ACK to work around a Mellanox bug
* which doesn't accept a RDMA read response or atomic
* response as an ACK for earlier SENDs or RDMA writes.
*/
if (qp->r_head_ack_queue == qp->s_tail_ack_queue &&
!(qp->s_flags & IPATH_S_ACK_PENDING) &&
qp->s_ack_state == OP(ACKNOWLEDGE)) {
spin_unlock_irqrestore(&qp->s_lock, flags);
qp->r_nak_state = 0;
qp->r_ack_psn = qp->s_ack_queue[i].psn - 1;
goto send_ack;
}
/*
* Resend the RDMA read or atomic op which
* ACKs this duplicate request.
......@@ -1476,11 +1497,21 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev,
static void ipath_rc_error(struct ipath_qp *qp, enum ib_wc_status err)
{
unsigned long flags;
int lastwqe;
spin_lock_irqsave(&qp->s_lock, flags);
qp->state = IB_QPS_ERR;
ipath_error_qp(qp, err);
lastwqe = ipath_error_qp(qp, err);
spin_unlock_irqrestore(&qp->s_lock, flags);
if (lastwqe) {
struct ib_event ev;
ev.device = qp->ibqp.device;
ev.element.qp = &qp->ibqp;
ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
}
}
static inline void ipath_update_ack_queue(struct ipath_qp *qp, unsigned n)
......
......@@ -31,6 +31,8 @@
* SOFTWARE.
*/
#include <linux/spinlock.h>
#include "ipath_verbs.h"
#include "ipath_kernel.h"
......@@ -106,27 +108,30 @@ void ipath_insert_rnr_queue(struct ipath_qp *qp)
spin_unlock_irqrestore(&dev->pending_lock, flags);
}
static int init_sge(struct ipath_qp *qp, struct ipath_rwqe *wqe)
/**
* ipath_init_sge - Validate a RWQE and fill in the SGE state
* @qp: the QP
*
* Return 1 if OK.
*/
int ipath_init_sge(struct ipath_qp *qp, struct ipath_rwqe *wqe,
u32 *lengthp, struct ipath_sge_state *ss)
{
int user = to_ipd(qp->ibqp.pd)->user;
int i, j, ret;
struct ib_wc wc;
qp->r_len = 0;
*lengthp = 0;
for (i = j = 0; i < wqe->num_sge; i++) {
if (wqe->sg_list[i].length == 0)
continue;
/* Check LKEY */
if ((user && wqe->sg_list[i].lkey == 0) ||
!ipath_lkey_ok(qp, &qp->r_sg_list[j], &wqe->sg_list[i],
IB_ACCESS_LOCAL_WRITE))
if (!ipath_lkey_ok(qp, j ? &ss->sg_list[j - 1] : &ss->sge,
&wqe->sg_list[i], IB_ACCESS_LOCAL_WRITE))
goto bad_lkey;
qp->r_len += wqe->sg_list[i].length;
*lengthp += wqe->sg_list[i].length;
j++;
}
qp->r_sge.sge = qp->r_sg_list[0];
qp->r_sge.sg_list = qp->r_sg_list + 1;
qp->r_sge.num_sge = j;
ss->num_sge = j;
ret = 1;
goto bail;
......@@ -172,6 +177,8 @@ int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only)
u32 tail;
int ret;
qp->r_sge.sg_list = qp->r_sg_list;
if (qp->ibqp.srq) {
srq = to_isrq(qp->ibqp.srq);
handler = srq->ibsrq.event_handler;
......@@ -199,7 +206,8 @@ int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only)
wqe = get_rwqe_ptr(rq, tail);
if (++tail >= rq->size)
tail = 0;
} while (!wr_id_only && !init_sge(qp, wqe));
} while (!wr_id_only && !ipath_init_sge(qp, wqe, &qp->r_len,
&qp->r_sge));
qp->r_wr_id = wqe->wr_id;
wq->tail = tail;
......@@ -239,9 +247,9 @@ int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only)
/**
* ipath_ruc_loopback - handle UC and RC lookback requests
* @sqp: the loopback QP
* @sqp: the sending QP
*
* This is called from ipath_do_uc_send() or ipath_do_rc_send() to
* This is called from ipath_do_send() to
* forward a WQE addressed to the same HCA.
* Note that although we are single threaded due to the tasklet, we still
* have to protect against post_send(). We don't have to worry about
......@@ -450,40 +458,18 @@ static void ipath_ruc_loopback(struct ipath_qp *sqp)
wc.byte_len = wqe->length;
wc.qp = &qp->ibqp;
wc.src_qp = qp->remote_qpn;
/* XXX do we know which pkey matched? Only needed for GSI. */
wc.pkey_index = 0;
wc.slid = qp->remote_ah_attr.dlid;
wc.sl = qp->remote_ah_attr.sl;
wc.dlid_path_bits = 0;
wc.port_num = 1;
/* Signal completion event if the solicited bit is set. */
ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
wqe->wr.send_flags & IB_SEND_SOLICITED);
send_comp:
sqp->s_rnr_retry = sqp->s_rnr_retry_cnt;
if (!(sqp->s_flags & IPATH_S_SIGNAL_REQ_WR) ||
(wqe->wr.send_flags & IB_SEND_SIGNALED)) {
wc.wr_id = wqe->wr.wr_id;
wc.status = IB_WC_SUCCESS;
wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
wc.vendor_err = 0;
wc.byte_len = wqe->length;
wc.qp = &sqp->ibqp;
wc.src_qp = 0;
wc.pkey_index = 0;
wc.slid = 0;
wc.sl = 0;
wc.dlid_path_bits = 0;
wc.port_num = 0;
ipath_cq_enter(to_icq(sqp->ibqp.send_cq), &wc, 0);
}
/* Update s_last now that we are finished with the SWQE */
spin_lock_irqsave(&sqp->s_lock, flags);
if (++sqp->s_last >= sqp->s_size)
sqp->s_last = 0;
spin_unlock_irqrestore(&sqp->s_lock, flags);
ipath_send_complete(sqp, wqe, IB_WC_SUCCESS);
goto again;
done:
......@@ -491,13 +477,11 @@ static void ipath_ruc_loopback(struct ipath_qp *sqp)
wake_up(&qp->wait);
}
static int want_buffer(struct ipath_devdata *dd)
static void want_buffer(struct ipath_devdata *dd)
{
set_bit(IPATH_S_PIOINTBUFAVAIL, &dd->ipath_sendctrl);
ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
dd->ipath_sendctrl);
return 0;
}
/**
......@@ -507,14 +491,11 @@ static int want_buffer(struct ipath_devdata *dd)
*
* Called when we run out of PIO buffers.
*/
static void ipath_no_bufs_available(struct ipath_qp *qp, struct ipath_ibdev *dev)
static void ipath_no_bufs_available(struct ipath_qp *qp,
struct ipath_ibdev *dev)
{
unsigned long flags;
spin_lock_irqsave(&dev->pending_lock, flags);
if (list_empty(&qp->piowait))
list_add_tail(&qp->piowait, &dev->piowait);
spin_unlock_irqrestore(&dev->pending_lock, flags);
/*
* Note that as soon as want_buffer() is called and
* possibly before it returns, ipath_ib_piobufavail()
......@@ -524,100 +505,13 @@ static void ipath_no_bufs_available(struct ipath_qp *qp, struct ipath_ibdev *dev
* We leave the busy flag set so that another post send doesn't
* try to put the same QP on the piowait list again.
*/
spin_lock_irqsave(&dev->pending_lock, flags);
list_add_tail(&qp->piowait, &dev->piowait);
spin_unlock_irqrestore(&dev->pending_lock, flags);
want_buffer(dev->dd);
dev->n_piowait++;
}
/**
* ipath_post_ruc_send - post RC and UC sends
* @qp: the QP to post on
* @wr: the work request to send
*/
int ipath_post_ruc_send(struct ipath_qp *qp, struct ib_send_wr *wr)
{
struct ipath_swqe *wqe;
unsigned long flags;
u32 next;
int i, j;
int acc;
int ret;
/*
* Don't allow RDMA reads or atomic operations on UC or
* undefined operations.
* Make sure buffer is large enough to hold the result for atomics.
*/
if (qp->ibqp.qp_type == IB_QPT_UC) {
if ((unsigned) wr->opcode >= IB_WR_RDMA_READ) {
ret = -EINVAL;
goto bail;
}
} else if ((unsigned) wr->opcode > IB_WR_ATOMIC_FETCH_AND_ADD) {
ret = -EINVAL;
goto bail;
} else if (wr->opcode >= IB_WR_ATOMIC_CMP_AND_SWP &&
(wr->num_sge == 0 ||
wr->sg_list[0].length < sizeof(u64) ||
wr->sg_list[0].addr & (sizeof(u64) - 1))) {
ret = -EINVAL;
goto bail;
} else if (wr->opcode >= IB_WR_RDMA_READ && !qp->s_max_rd_atomic) {
ret = -EINVAL;
goto bail;
}
/* IB spec says that num_sge == 0 is OK. */
if (wr->num_sge > qp->s_max_sge) {
ret = -ENOMEM;
goto bail;
}
spin_lock_irqsave(&qp->s_lock, flags);
next = qp->s_head + 1;
if (next >= qp->s_size)
next = 0;
if (next == qp->s_last) {
spin_unlock_irqrestore(&qp->s_lock, flags);
ret = -EINVAL;
goto bail;
}
wqe = get_swqe_ptr(qp, qp->s_head);
wqe->wr = *wr;
wqe->ssn = qp->s_ssn++;
wqe->sg_list[0].mr = NULL;
wqe->sg_list[0].vaddr = NULL;
wqe->sg_list[0].length = 0;
wqe->sg_list[0].sge_length = 0;
wqe->length = 0;
acc = wr->opcode >= IB_WR_RDMA_READ ? IB_ACCESS_LOCAL_WRITE : 0;
for (i = 0, j = 0; i < wr->num_sge; i++) {
if (to_ipd(qp->ibqp.pd)->user && wr->sg_list[i].lkey == 0) {
spin_unlock_irqrestore(&qp->s_lock, flags);
ret = -EINVAL;
goto bail;
}
if (wr->sg_list[i].length == 0)
continue;
if (!ipath_lkey_ok(qp, &wqe->sg_list[j], &wr->sg_list[i],
acc)) {
spin_unlock_irqrestore(&qp->s_lock, flags);
ret = -EINVAL;
goto bail;
}
wqe->length += wr->sg_list[i].length;
j++;
}
wqe->wr.num_sge = j;
qp->s_head = next;
spin_unlock_irqrestore(&qp->s_lock, flags);
ipath_do_ruc_send((unsigned long) qp);
ret = 0;
bail:
return ret;
}
/**
* ipath_make_grh - construct a GRH header
* @dev: a pointer to the ipath device
......@@ -648,39 +542,66 @@ u32 ipath_make_grh(struct ipath_ibdev *dev, struct ib_grh *hdr,
return sizeof(struct ib_grh) / sizeof(u32);
}
void ipath_make_ruc_header(struct ipath_ibdev *dev, struct ipath_qp *qp,
struct ipath_other_headers *ohdr,
u32 bth0, u32 bth2)
{
u16 lrh0;
u32 nwords;
u32 extra_bytes;
/* Construct the header. */
extra_bytes = -qp->s_cur_size & 3;
nwords = (qp->s_cur_size + extra_bytes) >> 2;
lrh0 = IPATH_LRH_BTH;
if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) {
qp->s_hdrwords += ipath_make_grh(dev, &qp->s_hdr.u.l.grh,
&qp->remote_ah_attr.grh,
qp->s_hdrwords, nwords);
lrh0 = IPATH_LRH_GRH;
}
lrh0 |= qp->remote_ah_attr.sl << 4;
qp->s_hdr.lrh[0] = cpu_to_be16(lrh0);
qp->s_hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid);
qp->s_hdr.lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC);
qp->s_hdr.lrh[3] = cpu_to_be16(dev->dd->ipath_lid);
bth0 |= ipath_get_pkey(dev->dd, qp->s_pkey_index);
bth0 |= extra_bytes << 20;
ohdr->bth[0] = cpu_to_be32(bth0 | (1 << 22));
ohdr->bth[1] = cpu_to_be32(qp->remote_qpn);
ohdr->bth[2] = cpu_to_be32(bth2);
}
/**
* ipath_do_ruc_send - perform a send on an RC or UC QP
* ipath_do_send - perform a send on a QP
* @data: contains a pointer to the QP
*
* Process entries in the send work queue until credit or queue is
* exhausted. Only allow one CPU to send a packet per QP (tasklet).
* Otherwise, after we drop the QP s_lock, two threads could send
* packets out of order.
* Otherwise, two threads could send packets out of order.
*/
void ipath_do_ruc_send(unsigned long data)
void ipath_do_send(unsigned long data)
{
struct ipath_qp *qp = (struct ipath_qp *)data;
struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
unsigned long flags;
u16 lrh0;
u32 nwords;
u32 extra_bytes;
u32 bth0;
u32 bth2;
u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu);
struct ipath_other_headers *ohdr;
int (*make_req)(struct ipath_qp *qp);
if (test_and_set_bit(IPATH_S_BUSY, &qp->s_busy))
goto bail;
if (unlikely(qp->remote_ah_attr.dlid == dev->dd->ipath_lid)) {
if ((qp->ibqp.qp_type == IB_QPT_RC ||
qp->ibqp.qp_type == IB_QPT_UC) &&
qp->remote_ah_attr.dlid == dev->dd->ipath_lid) {
ipath_ruc_loopback(qp);
goto clear;
}
ohdr = &qp->s_hdr.u.oth;
if (qp->remote_ah_attr.ah_flags & IB_AH_GRH)
ohdr = &qp->s_hdr.u.l.oth;
if (qp->ibqp.qp_type == IB_QPT_RC)
make_req = ipath_make_rc_req;
else if (qp->ibqp.qp_type == IB_QPT_UC)
make_req = ipath_make_uc_req;
else
make_req = ipath_make_ud_req;
again:
/* Check for a constructed packet to be sent. */
......@@ -689,9 +610,8 @@ void ipath_do_ruc_send(unsigned long data)
* If no PIO bufs are available, return. An interrupt will
* call ipath_ib_piobufavail() when one is available.
*/
if (ipath_verbs_send(dev->dd, qp->s_hdrwords,
(u32 *) &qp->s_hdr, qp->s_cur_size,
qp->s_cur_sge)) {
if (ipath_verbs_send(qp, &qp->s_hdr, qp->s_hdrwords,
qp->s_cur_sge, qp->s_cur_size)) {
ipath_no_bufs_available(qp, dev);
goto bail;
}
......@@ -700,54 +620,42 @@ void ipath_do_ruc_send(unsigned long data)
qp->s_hdrwords = 0;
}
/*
* The lock is needed to synchronize between setting
* qp->s_ack_state, resend timer, and post_send().
*/
spin_lock_irqsave(&qp->s_lock, flags);
if (!((qp->ibqp.qp_type == IB_QPT_RC) ?
ipath_make_rc_req(qp, ohdr, pmtu, &bth0, &bth2) :
ipath_make_uc_req(qp, ohdr, pmtu, &bth0, &bth2))) {
/*
* Clear the busy bit before unlocking to avoid races with
* adding new work queue items and then failing to process
* them.
*/
clear_bit(IPATH_S_BUSY, &qp->s_busy);
spin_unlock_irqrestore(&qp->s_lock, flags);
goto bail;
}
if (make_req(qp))
goto again;
clear:
clear_bit(IPATH_S_BUSY, &qp->s_busy);
bail:;
}
spin_unlock_irqrestore(&qp->s_lock, flags);
void ipath_send_complete(struct ipath_qp *qp, struct ipath_swqe *wqe,
enum ib_wc_status status)
{
u32 last = qp->s_last;
/* Construct the header. */
extra_bytes = (4 - qp->s_cur_size) & 3;
nwords = (qp->s_cur_size + extra_bytes) >> 2;
lrh0 = IPATH_LRH_BTH;
if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) {
qp->s_hdrwords += ipath_make_grh(dev, &qp->s_hdr.u.l.grh,
&qp->remote_ah_attr.grh,
qp->s_hdrwords, nwords);
lrh0 = IPATH_LRH_GRH;
}
lrh0 |= qp->remote_ah_attr.sl << 4;
qp->s_hdr.lrh[0] = cpu_to_be16(lrh0);
qp->s_hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid);
qp->s_hdr.lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords +
SIZE_OF_CRC);
qp->s_hdr.lrh[3] = cpu_to_be16(dev->dd->ipath_lid);
bth0 |= ipath_get_pkey(dev->dd, qp->s_pkey_index);
bth0 |= extra_bytes << 20;
ohdr->bth[0] = cpu_to_be32(bth0);
ohdr->bth[1] = cpu_to_be32(qp->remote_qpn);
ohdr->bth[2] = cpu_to_be32(bth2);
if (++last == qp->s_size)
last = 0;
qp->s_last = last;
/* Check for more work to do. */
goto again;
/* See ch. 11.2.4.1 and 10.7.3.1 */
if (!(qp->s_flags & IPATH_S_SIGNAL_REQ_WR) ||
(wqe->wr.send_flags & IB_SEND_SIGNALED) ||
status != IB_WC_SUCCESS) {
struct ib_wc wc;
clear:
clear_bit(IPATH_S_BUSY, &qp->s_busy);
bail:
return;
wc.wr_id = wqe->wr.wr_id;
wc.status = status;
wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
wc.vendor_err = 0;
wc.byte_len = wqe->length;
wc.imm_data = 0;
wc.qp = &qp->ibqp;
wc.src_qp = 0;
wc.wc_flags = 0;
wc.pkey_index = 0;
wc.slid = 0;
wc.sl = 0;
wc.dlid_path_bits = 0;
wc.port_num = 0;
ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 0);
}
}
......@@ -55,7 +55,6 @@ u64 ipath_snap_cntr(struct ipath_devdata *dd, ipath_creg creg)
u64 val64;
unsigned long t0, t1;
u64 ret;
unsigned long flags;
t0 = jiffies;
/* If fast increment counters are only 32 bits, snapshot them,
......@@ -92,18 +91,12 @@ u64 ipath_snap_cntr(struct ipath_devdata *dd, ipath_creg creg)
if (creg == dd->ipath_cregs->cr_wordsendcnt) {
if (val != dd->ipath_lastsword) {
dd->ipath_sword += val - dd->ipath_lastsword;
spin_lock_irqsave(&dd->ipath_eep_st_lock, flags);
dd->ipath_traffic_wds += val - dd->ipath_lastsword;
spin_unlock_irqrestore(&dd->ipath_eep_st_lock, flags);
dd->ipath_lastsword = val;
}
val64 = dd->ipath_sword;
} else if (creg == dd->ipath_cregs->cr_wordrcvcnt) {
if (val != dd->ipath_lastrword) {
dd->ipath_rword += val - dd->ipath_lastrword;
spin_lock_irqsave(&dd->ipath_eep_st_lock, flags);
dd->ipath_traffic_wds += val - dd->ipath_lastrword;
spin_unlock_irqrestore(&dd->ipath_eep_st_lock, flags);
dd->ipath_lastrword = val;
}
val64 = dd->ipath_rword;
......@@ -247,6 +240,7 @@ void ipath_get_faststats(unsigned long opaque)
u32 val;
static unsigned cnt;
unsigned long flags;
u64 traffic_wds;
/*
* don't access the chip while running diags, or memory diags can
......@@ -262,12 +256,13 @@ void ipath_get_faststats(unsigned long opaque)
* exceeding a threshold, so we need to check the word-counts
* even if they are 64-bit.
*/
ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordsendcnt);
ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordrcvcnt);
traffic_wds = ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordsendcnt) +
ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordrcvcnt);
spin_lock_irqsave(&dd->ipath_eep_st_lock, flags);
if (dd->ipath_traffic_wds >= IPATH_TRAFFIC_ACTIVE_THRESHOLD)
traffic_wds -= dd->ipath_traffic_wds;
dd->ipath_traffic_wds += traffic_wds;
if (traffic_wds >= IPATH_TRAFFIC_ACTIVE_THRESHOLD)
atomic_add(5, &dd->ipath_active_time); /* S/B #define */
dd->ipath_traffic_wds = 0;
spin_unlock_irqrestore(&dd->ipath_eep_st_lock, flags);
if (dd->ipath_flags & IPATH_32BITCOUNTERS) {
......
......@@ -163,6 +163,42 @@ static ssize_t show_boardversion(struct device *dev,
return scnprintf(buf, PAGE_SIZE, "%s", dd->ipath_boardversion);
}
static ssize_t show_lmc(struct device *dev,
struct device_attribute *attr,
char *buf)
{
struct ipath_devdata *dd = dev_get_drvdata(dev);
return scnprintf(buf, PAGE_SIZE, "%u\n", dd->ipath_lmc);
}
static ssize_t store_lmc(struct device *dev,
struct device_attribute *attr,
const char *buf,
size_t count)
{
struct ipath_devdata *dd = dev_get_drvdata(dev);
u16 lmc = 0;
int ret;
ret = ipath_parse_ushort(buf, &lmc);
if (ret < 0)
goto invalid;
if (lmc > 7) {
ret = -EINVAL;
goto invalid;
}
ipath_set_lid(dd, dd->ipath_lid, lmc);
goto bail;
invalid:
ipath_dev_err(dd, "attempt to set invalid LMC %u\n", lmc);
bail:
return ret;
}
static ssize_t show_lid(struct device *dev,
struct device_attribute *attr,
char *buf)
......@@ -190,7 +226,7 @@ static ssize_t store_lid(struct device *dev,
goto invalid;
}
ipath_set_lid(dd, lid, 0);
ipath_set_lid(dd, lid, dd->ipath_lmc);
goto bail;
invalid:
......@@ -648,6 +684,7 @@ static struct attribute_group driver_attr_group = {
};
static DEVICE_ATTR(guid, S_IWUSR | S_IRUGO, show_guid, store_guid);
static DEVICE_ATTR(lmc, S_IWUSR | S_IRUGO, show_lmc, store_lmc);
static DEVICE_ATTR(lid, S_IWUSR | S_IRUGO, show_lid, store_lid);
static DEVICE_ATTR(link_state, S_IWUSR, NULL, store_link_state);
static DEVICE_ATTR(mlid, S_IWUSR | S_IRUGO, show_mlid, store_mlid);
......@@ -667,6 +704,7 @@ static DEVICE_ATTR(logged_errors, S_IRUGO, show_logged_errs, NULL);
static struct attribute *dev_attributes[] = {
&dev_attr_guid.attr,
&dev_attr_lmc.attr,
&dev_attr_lid.attr,
&dev_attr_link_state.attr,
&dev_attr_mlid.attr,
......
......@@ -37,72 +37,40 @@
/* cut down ridiculously long IB macro names */
#define OP(x) IB_OPCODE_UC_##x
static void complete_last_send(struct ipath_qp *qp, struct ipath_swqe *wqe,
struct ib_wc *wc)
{
if (++qp->s_last == qp->s_size)
qp->s_last = 0;
if (!(qp->s_flags & IPATH_S_SIGNAL_REQ_WR) ||
(wqe->wr.send_flags & IB_SEND_SIGNALED)) {
wc->wr_id = wqe->wr.wr_id;
wc->status = IB_WC_SUCCESS;
wc->opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
wc->vendor_err = 0;
wc->byte_len = wqe->length;
wc->qp = &qp->ibqp;
wc->src_qp = qp->remote_qpn;
wc->pkey_index = 0;
wc->slid = qp->remote_ah_attr.dlid;
wc->sl = qp->remote_ah_attr.sl;
wc->dlid_path_bits = 0;
wc->port_num = 0;
ipath_cq_enter(to_icq(qp->ibqp.send_cq), wc, 0);
}
}
/**
* ipath_make_uc_req - construct a request packet (SEND, RDMA write)
* @qp: a pointer to the QP
* @ohdr: a pointer to the IB header being constructed
* @pmtu: the path MTU
* @bth0p: pointer to the BTH opcode word
* @bth2p: pointer to the BTH PSN word
*
* Return 1 if constructed; otherwise, return 0.
* Note the QP s_lock must be held and interrupts disabled.
*/
int ipath_make_uc_req(struct ipath_qp *qp,
struct ipath_other_headers *ohdr,
u32 pmtu, u32 *bth0p, u32 *bth2p)
int ipath_make_uc_req(struct ipath_qp *qp)
{
struct ipath_other_headers *ohdr;
struct ipath_swqe *wqe;
u32 hwords;
u32 bth0;
u32 len;
struct ib_wc wc;
u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu);
int ret = 0;
if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK))
goto done;
ohdr = &qp->s_hdr.u.oth;
if (qp->remote_ah_attr.ah_flags & IB_AH_GRH)
ohdr = &qp->s_hdr.u.l.oth;
/* header size in 32-bit words LRH+BTH = (8+12)/4. */
hwords = 5;
bth0 = 1 << 22; /* Set M bit */
/* Get the next send request. */
wqe = get_swqe_ptr(qp, qp->s_last);
wqe = get_swqe_ptr(qp, qp->s_cur);
qp->s_wqe = NULL;
switch (qp->s_state) {
default:
/*
* Signal the completion of the last send
* (if there is one).
*/
if (qp->s_last != qp->s_tail) {
complete_last_send(qp, wqe, &wc);
wqe = get_swqe_ptr(qp, qp->s_last);
}
/* Check if send work queue is empty. */
if (qp->s_tail == qp->s_head)
if (qp->s_cur == qp->s_head)
goto done;
/*
* Start a new request.
......@@ -131,6 +99,9 @@ int ipath_make_uc_req(struct ipath_qp *qp,
}
if (wqe->wr.send_flags & IB_SEND_SOLICITED)
bth0 |= 1 << 23;
qp->s_wqe = wqe;
if (++qp->s_cur >= qp->s_size)
qp->s_cur = 0;
break;
case IB_WR_RDMA_WRITE:
......@@ -157,13 +128,14 @@ int ipath_make_uc_req(struct ipath_qp *qp,
if (wqe->wr.send_flags & IB_SEND_SOLICITED)
bth0 |= 1 << 23;
}
qp->s_wqe = wqe;
if (++qp->s_cur >= qp->s_size)
qp->s_cur = 0;
break;
default:
goto done;
}
if (++qp->s_tail >= qp->s_size)
qp->s_tail = 0;
break;
case OP(SEND_FIRST):
......@@ -185,6 +157,9 @@ int ipath_make_uc_req(struct ipath_qp *qp,
}
if (wqe->wr.send_flags & IB_SEND_SOLICITED)
bth0 |= 1 << 23;
qp->s_wqe = wqe;
if (++qp->s_cur >= qp->s_size)
qp->s_cur = 0;
break;
case OP(RDMA_WRITE_FIRST):
......@@ -207,18 +182,22 @@ int ipath_make_uc_req(struct ipath_qp *qp,
if (wqe->wr.send_flags & IB_SEND_SOLICITED)
bth0 |= 1 << 23;
}
qp->s_wqe = wqe;
if (++qp->s_cur >= qp->s_size)
qp->s_cur = 0;
break;
}
qp->s_len -= len;
qp->s_hdrwords = hwords;
qp->s_cur_sge = &qp->s_sge;
qp->s_cur_size = len;
*bth0p = bth0 | (qp->s_state << 24);
*bth2p = qp->s_next_psn++ & IPATH_PSN_MASK;
return 1;
ipath_make_ruc_header(to_idev(qp->ibqp.device),
qp, ohdr, bth0 | (qp->s_state << 24),
qp->s_next_psn++ & IPATH_PSN_MASK);
ret = 1;
done:
return 0;
return ret;
}
/**
......@@ -485,6 +464,16 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE):
rdma_last_imm:
if (header_in_data) {
wc.imm_data = *(__be32 *) data;
data += sizeof(__be32);
} else {
/* Immediate data comes after BTH */
wc.imm_data = ohdr->u.imm_data;
}
hdrsize += 4;
wc.wc_flags = IB_WC_WITH_IMM;
/* Get the number of bytes the message was padded by. */
pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
/* Check for invalid length. */
......@@ -505,16 +494,7 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
dev->n_pkt_drops++;
goto done;
}
if (header_in_data) {
wc.imm_data = *(__be32 *) data;
data += sizeof(__be32);
} else {
/* Immediate data comes after BTH */
wc.imm_data = ohdr->u.imm_data;
}
hdrsize += 4;
wc.wc_flags = IB_WC_WITH_IMM;
wc.byte_len = 0;
wc.byte_len = qp->r_len;
goto last_imm;
case OP(RDMA_WRITE_LAST):
......
......@@ -42,6 +42,8 @@
#include <rdma/ib_pack.h>
#include <rdma/ib_user_verbs.h>
#include "ipath_kernel.h"
#define IPATH_MAX_RDMA_ATOMIC 4
#define QPN_MAX (1 << 24)
......@@ -59,6 +61,7 @@
*/
#define IB_CQ_NONE (IB_CQ_NEXT_COMP + 1)
/* AETH NAK opcode values */
#define IB_RNR_NAK 0x20
#define IB_NAK_PSN_ERROR 0x60
#define IB_NAK_INVALID_REQUEST 0x61
......@@ -66,6 +69,7 @@
#define IB_NAK_REMOTE_OPERATIONAL_ERROR 0x63
#define IB_NAK_INVALID_RD_REQUEST 0x64
/* Flags for checking QP state (see ib_ipath_state_ops[]) */
#define IPATH_POST_SEND_OK 0x01
#define IPATH_POST_RECV_OK 0x02
#define IPATH_PROCESS_RECV_OK 0x04
......@@ -187,7 +191,11 @@ struct ipath_mmap_info {
struct ipath_cq_wc {
u32 head; /* index of next entry to fill */
u32 tail; /* index of next ib_poll_cq() entry */
struct ib_uverbs_wc queue[1]; /* this is actually size ibcq.cqe + 1 */
union {
/* these are actually size ibcq.cqe + 1 */
struct ib_uverbs_wc uqueue[0];
struct ib_wc kqueue[0];
};
};
/*
......@@ -239,7 +247,7 @@ struct ipath_mregion {
*/
struct ipath_sge {
struct ipath_mregion *mr;
void *vaddr; /* current pointer into the segment */
void *vaddr; /* kernel virtual address of segment */
u32 sge_length; /* length of the SGE */
u32 length; /* remaining length of the segment */
u16 m; /* current index: mr->map[m] */
......@@ -407,6 +415,7 @@ struct ipath_qp {
u32 s_ssn; /* SSN of tail entry */
u32 s_lsn; /* limit sequence number (credit) */
struct ipath_swqe *s_wq; /* send work queue */
struct ipath_swqe *s_wqe;
struct ipath_rq r_rq; /* receive work queue */
struct ipath_sge r_sg_list[0]; /* verified SGEs */
};
......@@ -492,7 +501,7 @@ struct ipath_ibdev {
int ib_unit; /* This is the device number */
u16 sm_lid; /* in host order */
u8 sm_sl;
u8 mkeyprot_resv_lmc;
u8 mkeyprot;
/* non-zero when timer is set */
unsigned long mkey_lease_timeout;
......@@ -667,7 +676,7 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd,
int ipath_destroy_qp(struct ib_qp *ibqp);
void ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err);
int ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err);
int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
int attr_mask, struct ib_udata *udata);
......@@ -683,8 +692,8 @@ void ipath_sqerror_qp(struct ipath_qp *qp, struct ib_wc *wc);
void ipath_get_credit(struct ipath_qp *qp, u32 aeth);
int ipath_verbs_send(struct ipath_devdata *dd, u32 hdrwords,
u32 *hdr, u32 len, struct ipath_sge_state *ss);
int ipath_verbs_send(struct ipath_qp *qp, struct ipath_ib_header *hdr,
u32 hdrwords, struct ipath_sge_state *ss, u32 len);
void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int sig);
......@@ -692,8 +701,6 @@ void ipath_copy_sge(struct ipath_sge_state *ss, void *data, u32 length);
void ipath_skip_sge(struct ipath_sge_state *ss, u32 length);
int ipath_post_ruc_send(struct ipath_qp *qp, struct ib_send_wr *wr);
void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
int has_grh, void *data, u32 tlen, struct ipath_qp *qp);
......@@ -733,6 +740,8 @@ int ipath_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr);
int ipath_destroy_srq(struct ib_srq *ibsrq);
void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int sig);
int ipath_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry);
struct ib_cq *ipath_create_cq(struct ib_device *ibdev, int entries, int comp_vector,
......@@ -782,18 +791,28 @@ int ipath_mmap(struct ib_ucontext *context, struct vm_area_struct *vma);
void ipath_insert_rnr_queue(struct ipath_qp *qp);
int ipath_init_sge(struct ipath_qp *qp, struct ipath_rwqe *wqe,
u32 *lengthp, struct ipath_sge_state *ss);
int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only);
u32 ipath_make_grh(struct ipath_ibdev *dev, struct ib_grh *hdr,
struct ib_global_route *grh, u32 hwords, u32 nwords);
void ipath_do_ruc_send(unsigned long data);
void ipath_make_ruc_header(struct ipath_ibdev *dev, struct ipath_qp *qp,
struct ipath_other_headers *ohdr,
u32 bth0, u32 bth2);
void ipath_do_send(unsigned long data);
void ipath_send_complete(struct ipath_qp *qp, struct ipath_swqe *wqe,
enum ib_wc_status status);
int ipath_make_rc_req(struct ipath_qp *qp);
int ipath_make_rc_req(struct ipath_qp *qp, struct ipath_other_headers *ohdr,
u32 pmtu, u32 *bth0p, u32 *bth2p);
int ipath_make_uc_req(struct ipath_qp *qp);
int ipath_make_uc_req(struct ipath_qp *qp, struct ipath_other_headers *ohdr,
u32 pmtu, u32 *bth0p, u32 *bth2p);
int ipath_make_ud_req(struct ipath_qp *qp);
int ipath_register_ib_device(struct ipath_devdata *);
......
......@@ -476,9 +476,48 @@ static int init_node_data(struct mlx4_ib_dev *dev)
return err;
}
static ssize_t show_hca(struct class_device *cdev, char *buf)
{
struct mlx4_ib_dev *dev = container_of(cdev, struct mlx4_ib_dev, ib_dev.class_dev);
return sprintf(buf, "MT%d\n", dev->dev->pdev->device);
}
static ssize_t show_fw_ver(struct class_device *cdev, char *buf)
{
struct mlx4_ib_dev *dev = container_of(cdev, struct mlx4_ib_dev, ib_dev.class_dev);
return sprintf(buf, "%d.%d.%d\n", (int) (dev->dev->caps.fw_ver >> 32),
(int) (dev->dev->caps.fw_ver >> 16) & 0xffff,
(int) dev->dev->caps.fw_ver & 0xffff);
}
static ssize_t show_rev(struct class_device *cdev, char *buf)
{
struct mlx4_ib_dev *dev = container_of(cdev, struct mlx4_ib_dev, ib_dev.class_dev);
return sprintf(buf, "%x\n", dev->dev->rev_id);
}
static ssize_t show_board(struct class_device *cdev, char *buf)
{
struct mlx4_ib_dev *dev = container_of(cdev, struct mlx4_ib_dev, ib_dev.class_dev);
return sprintf(buf, "%.*s\n", MLX4_BOARD_ID_LEN, dev->dev->board_id);
}
static CLASS_DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
static CLASS_DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL);
static CLASS_DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
static CLASS_DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL);
static struct class_device_attribute *mlx4_class_attributes[] = {
&class_device_attr_hw_rev,
&class_device_attr_fw_ver,
&class_device_attr_hca_type,
&class_device_attr_board_id
};
static void *mlx4_ib_add(struct mlx4_dev *dev)
{
struct mlx4_ib_dev *ibdev;
int i;
ibdev = (struct mlx4_ib_dev *) ib_alloc_device(sizeof *ibdev);
if (!ibdev) {
......@@ -568,6 +607,11 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
ibdev->ib_dev.detach_mcast = mlx4_ib_mcg_detach;
ibdev->ib_dev.process_mad = mlx4_ib_process_mad;
ibdev->ib_dev.alloc_fmr = mlx4_ib_fmr_alloc;
ibdev->ib_dev.map_phys_fmr = mlx4_ib_map_phys_fmr;
ibdev->ib_dev.unmap_fmr = mlx4_ib_unmap_fmr;
ibdev->ib_dev.dealloc_fmr = mlx4_ib_fmr_dealloc;
if (init_node_data(ibdev))
goto err_map;
......@@ -580,6 +624,12 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
if (mlx4_ib_mad_init(ibdev))
goto err_reg;
for (i = 0; i < ARRAY_SIZE(mlx4_class_attributes); ++i) {
if (class_device_create_file(&ibdev->ib_dev.class_dev,
mlx4_class_attributes[i]))
goto err_reg;
}
return ibdev;
err_reg:
......
......@@ -93,6 +93,11 @@ struct mlx4_ib_mr {
struct ib_umem *umem;
};
struct mlx4_ib_fmr {
struct ib_fmr ibfmr;
struct mlx4_fmr mfmr;
};
struct mlx4_ib_wq {
u64 *wrid;
spinlock_t lock;
......@@ -199,6 +204,10 @@ static inline struct mlx4_ib_mr *to_mmr(struct ib_mr *ibmr)
return container_of(ibmr, struct mlx4_ib_mr, ibmr);
}
static inline struct mlx4_ib_fmr *to_mfmr(struct ib_fmr *ibfmr)
{
return container_of(ibfmr, struct mlx4_ib_fmr, ibfmr);
}
static inline struct mlx4_ib_qp *to_mqp(struct ib_qp *ibqp)
{
return container_of(ibqp, struct mlx4_ib_qp, ibqp);
......@@ -284,6 +293,13 @@ int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
int mlx4_ib_mad_init(struct mlx4_ib_dev *dev);
void mlx4_ib_mad_cleanup(struct mlx4_ib_dev *dev);
struct ib_fmr *mlx4_ib_fmr_alloc(struct ib_pd *pd, int mr_access_flags,
struct ib_fmr_attr *fmr_attr);
int mlx4_ib_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, int npages,
u64 iova);
int mlx4_ib_unmap_fmr(struct list_head *fmr_list);
int mlx4_ib_fmr_dealloc(struct ib_fmr *fmr);
static inline int mlx4_ib_ah_grh_present(struct mlx4_ib_ah *ah)
{
return !!(ah->av.g_slid & 0x80);
......
......@@ -96,11 +96,10 @@ int mlx4_ib_umem_write_mtt(struct mlx4_ib_dev *dev, struct mlx4_mtt *mtt,
pages[i++] = sg_dma_address(&chunk->page_list[j]) +
umem->page_size * k;
/*
* Be friendly to WRITE_MTT firmware
* command, and pass it chunks of
* appropriate size.
* Be friendly to mlx4_write_mtt() and
* pass it chunks of appropriate size.
*/
if (i == PAGE_SIZE / sizeof (u64) - 2) {
if (i == PAGE_SIZE / sizeof (u64)) {
err = mlx4_write_mtt(dev->dev, mtt, n,
i, pages);
if (err)
......@@ -182,3 +181,96 @@ int mlx4_ib_dereg_mr(struct ib_mr *ibmr)
return 0;
}
struct ib_fmr *mlx4_ib_fmr_alloc(struct ib_pd *pd, int acc,
struct ib_fmr_attr *fmr_attr)
{
struct mlx4_ib_dev *dev = to_mdev(pd->device);
struct mlx4_ib_fmr *fmr;
int err = -ENOMEM;
fmr = kmalloc(sizeof *fmr, GFP_KERNEL);
if (!fmr)
return ERR_PTR(-ENOMEM);
err = mlx4_fmr_alloc(dev->dev, to_mpd(pd)->pdn, convert_access(acc),
fmr_attr->max_pages, fmr_attr->max_maps,
fmr_attr->page_shift, &fmr->mfmr);
if (err)
goto err_free;
err = mlx4_mr_enable(to_mdev(pd->device)->dev, &fmr->mfmr.mr);
if (err)
goto err_mr;
fmr->ibfmr.rkey = fmr->ibfmr.lkey = fmr->mfmr.mr.key;
return &fmr->ibfmr;
err_mr:
mlx4_mr_free(to_mdev(pd->device)->dev, &fmr->mfmr.mr);
err_free:
kfree(fmr);
return ERR_PTR(err);
}
int mlx4_ib_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
int npages, u64 iova)
{
struct mlx4_ib_fmr *ifmr = to_mfmr(ibfmr);
struct mlx4_ib_dev *dev = to_mdev(ifmr->ibfmr.device);
return mlx4_map_phys_fmr(dev->dev, &ifmr->mfmr, page_list, npages, iova,
&ifmr->ibfmr.lkey, &ifmr->ibfmr.rkey);
}
int mlx4_ib_unmap_fmr(struct list_head *fmr_list)
{
struct ib_fmr *ibfmr;
int err;
struct mlx4_dev *mdev = NULL;
list_for_each_entry(ibfmr, fmr_list, list) {
if (mdev && to_mdev(ibfmr->device)->dev != mdev)
return -EINVAL;
mdev = to_mdev(ibfmr->device)->dev;
}
if (!mdev)
return 0;
list_for_each_entry(ibfmr, fmr_list, list) {
struct mlx4_ib_fmr *ifmr = to_mfmr(ibfmr);
mlx4_fmr_unmap(mdev, &ifmr->mfmr, &ifmr->ibfmr.lkey, &ifmr->ibfmr.rkey);
}
/*
* Make sure all MPT status updates are visible before issuing
* SYNC_TPT firmware command.
*/
wmb();
err = mlx4_SYNC_TPT(mdev);
if (err)
printk(KERN_WARNING "mlx4_ib: SYNC_TPT error %d when "
"unmapping FMRs\n", err);
return 0;
}
int mlx4_ib_fmr_dealloc(struct ib_fmr *ibfmr)
{
struct mlx4_ib_fmr *ifmr = to_mfmr(ibfmr);
struct mlx4_ib_dev *dev = to_mdev(ibfmr->device);
int err;
err = mlx4_fmr_free(dev->dev, &ifmr->mfmr);
if (!err)
kfree(ifmr);
return err;
}
......@@ -1249,6 +1249,13 @@ static void set_data_seg(struct mlx4_wqe_data_seg *dseg, struct ib_sge *sg)
dseg->byte_count = cpu_to_be32(sg->length);
}
static void __set_data_seg(struct mlx4_wqe_data_seg *dseg, struct ib_sge *sg)
{
dseg->byte_count = cpu_to_be32(sg->length);
dseg->lkey = cpu_to_be32(sg->lkey);
dseg->addr = cpu_to_be64(sg->addr);
}
int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
struct ib_send_wr **bad_wr)
{
......@@ -1464,11 +1471,8 @@ int mlx4_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
scat = get_recv_wqe(qp, ind);
for (i = 0; i < wr->num_sge; ++i) {
scat[i].byte_count = cpu_to_be32(wr->sg_list[i].length);
scat[i].lkey = cpu_to_be32(wr->sg_list[i].lkey);
scat[i].addr = cpu_to_be64(wr->sg_list[i].addr);
}
for (i = 0; i < wr->num_sge; ++i)
__set_data_seg(scat + i, wr->sg_list + i);
if (i < qp->rq.max_gs) {
scat[i].byte_count = 0;
......
......@@ -251,7 +251,7 @@ int mlx4_ib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr)
if (ret)
return ret;
srq_attr->srq_limit = be16_to_cpu(limit_watermark);
srq_attr->srq_limit = limit_watermark;
srq_attr->max_wr = srq->msrq.max - 1;
srq_attr->max_sge = srq->msrq.max_gs;
......
......@@ -290,6 +290,12 @@ static int mthca_cmd_post(struct mthca_dev *dev,
err = mthca_cmd_post_hcr(dev, in_param, out_param, in_modifier,
op_modifier, op, token, event);
/*
* Make sure that our HCR writes don't get mixed in with
* writes from another CPU starting a FW command.
*/
mmiowb();
mutex_unlock(&dev->cmd.hcr_mutex);
return err;
}
......
......@@ -83,7 +83,7 @@ enum {
MTHCA_QP_CONTEXT_SIZE = 0x200,
MTHCA_RDB_ENTRY_SIZE = 0x20,
MTHCA_AV_SIZE = 0x20,
MTHCA_MGM_ENTRY_SIZE = 0x40,
MTHCA_MGM_ENTRY_SIZE = 0x100,
/* Arbel FW gives us these, but we need them for Tavor */
MTHCA_MPT_ENTRY_SIZE = 0x40,
......
......@@ -553,6 +553,14 @@ void ipoib_drain_cq(struct net_device *dev)
do {
n = ib_poll_cq(priv->cq, IPOIB_NUM_WC, priv->ibwc);
for (i = 0; i < n; ++i) {
/*
* Convert any successful completions to flush
* errors to avoid passing packets up the
* stack after bringing the device down.
*/
if (priv->ibwc[i].status == IB_WC_SUCCESS)
priv->ibwc[i].status = IB_WC_WR_FLUSH_ERR;
if (priv->ibwc[i].wr_id & IPOIB_CM_OP_SRQ)
ipoib_cm_handle_rx_wc(dev, priv->ibwc + i);
else if (priv->ibwc[i].wr_id & IPOIB_OP_RECV)
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册