提交 23790ba2 编写于 作者: D Doug Ledford

Merge branch 'k.o/for-4.12' into k.o/for-4.12-rdma-netdevice

......@@ -29,4 +29,5 @@ ib_umad-y := user_mad.o
ib_ucm-y := ucm.o
ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_marshall.o
ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_marshall.o \
rdma_core.o uverbs_std_types.o
/*
* Copyright (c) 2016, Mellanox Technologies inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <linux/file.h>
#include <linux/anon_inodes.h>
#include <rdma/ib_verbs.h>
#include <rdma/uverbs_types.h>
#include <linux/rcupdate.h>
#include "uverbs.h"
#include "core_priv.h"
#include "rdma_core.h"
void uverbs_uobject_get(struct ib_uobject *uobject)
{
kref_get(&uobject->ref);
}
static void uverbs_uobject_free(struct kref *ref)
{
struct ib_uobject *uobj =
container_of(ref, struct ib_uobject, ref);
if (uobj->type->type_class->needs_kfree_rcu)
kfree_rcu(uobj, rcu);
else
kfree(uobj);
}
void uverbs_uobject_put(struct ib_uobject *uobject)
{
kref_put(&uobject->ref, uverbs_uobject_free);
}
static int uverbs_try_lock_object(struct ib_uobject *uobj, bool exclusive)
{
/*
* When a shared access is required, we use a positive counter. Each
* shared access request checks that the value != -1 and increment it.
* Exclusive access is required for operations like write or destroy.
* In exclusive access mode, we check that the counter is zero (nobody
* claimed this object) and we set it to -1. Releasing a shared access
* lock is done simply by decreasing the counter. As for exclusive
* access locks, since only a single one of them is is allowed
* concurrently, setting the counter to zero is enough for releasing
* this lock.
*/
if (!exclusive)
return __atomic_add_unless(&uobj->usecnt, 1, -1) == -1 ?
-EBUSY : 0;
/* lock is either WRITE or DESTROY - should be exclusive */
return atomic_cmpxchg(&uobj->usecnt, 0, -1) == 0 ? 0 : -EBUSY;
}
static struct ib_uobject *alloc_uobj(struct ib_ucontext *context,
const struct uverbs_obj_type *type)
{
struct ib_uobject *uobj = kzalloc(type->obj_size, GFP_KERNEL);
if (!uobj)
return ERR_PTR(-ENOMEM);
/*
* user_handle should be filled by the handler,
* The object is added to the list in the commit stage.
*/
uobj->context = context;
uobj->type = type;
atomic_set(&uobj->usecnt, 0);
kref_init(&uobj->ref);
return uobj;
}
static int idr_add_uobj(struct ib_uobject *uobj)
{
int ret;
idr_preload(GFP_KERNEL);
spin_lock(&uobj->context->ufile->idr_lock);
/*
* We start with allocating an idr pointing to NULL. This represents an
* object which isn't initialized yet. We'll replace it later on with
* the real object once we commit.
*/
ret = idr_alloc(&uobj->context->ufile->idr, NULL, 0,
min_t(unsigned long, U32_MAX - 1, INT_MAX), GFP_NOWAIT);
if (ret >= 0)
uobj->id = ret;
spin_unlock(&uobj->context->ufile->idr_lock);
idr_preload_end();
return ret < 0 ? ret : 0;
}
/*
* It only removes it from the uobjects list, uverbs_uobject_put() is still
* required.
*/
static void uverbs_idr_remove_uobj(struct ib_uobject *uobj)
{
spin_lock(&uobj->context->ufile->idr_lock);
idr_remove(&uobj->context->ufile->idr, uobj->id);
spin_unlock(&uobj->context->ufile->idr_lock);
}
/* Returns the ib_uobject or an error. The caller should check for IS_ERR. */
static struct ib_uobject *lookup_get_idr_uobject(const struct uverbs_obj_type *type,
struct ib_ucontext *ucontext,
int id, bool exclusive)
{
struct ib_uobject *uobj;
rcu_read_lock();
/* object won't be released as we're protected in rcu */
uobj = idr_find(&ucontext->ufile->idr, id);
if (!uobj) {
uobj = ERR_PTR(-ENOENT);
goto free;
}
uverbs_uobject_get(uobj);
free:
rcu_read_unlock();
return uobj;
}
static struct ib_uobject *lookup_get_fd_uobject(const struct uverbs_obj_type *type,
struct ib_ucontext *ucontext,
int id, bool exclusive)
{
struct file *f;
struct ib_uobject *uobject;
const struct uverbs_obj_fd_type *fd_type =
container_of(type, struct uverbs_obj_fd_type, type);
if (exclusive)
return ERR_PTR(-EOPNOTSUPP);
f = fget(id);
if (!f)
return ERR_PTR(-EBADF);
uobject = f->private_data;
/*
* fget(id) ensures we are not currently running uverbs_close_fd,
* and the caller is expected to ensure that uverbs_close_fd is never
* done while a call top lookup is possible.
*/
if (f->f_op != fd_type->fops) {
fput(f);
return ERR_PTR(-EBADF);
}
uverbs_uobject_get(uobject);
return uobject;
}
struct ib_uobject *rdma_lookup_get_uobject(const struct uverbs_obj_type *type,
struct ib_ucontext *ucontext,
int id, bool exclusive)
{
struct ib_uobject *uobj;
int ret;
uobj = type->type_class->lookup_get(type, ucontext, id, exclusive);
if (IS_ERR(uobj))
return uobj;
if (uobj->type != type) {
ret = -EINVAL;
goto free;
}
ret = uverbs_try_lock_object(uobj, exclusive);
if (ret) {
WARN(ucontext->cleanup_reason,
"ib_uverbs: Trying to lookup_get while cleanup context\n");
goto free;
}
return uobj;
free:
uobj->type->type_class->lookup_put(uobj, exclusive);
uverbs_uobject_put(uobj);
return ERR_PTR(ret);
}
static struct ib_uobject *alloc_begin_idr_uobject(const struct uverbs_obj_type *type,
struct ib_ucontext *ucontext)
{
int ret;
struct ib_uobject *uobj;
uobj = alloc_uobj(ucontext, type);
if (IS_ERR(uobj))
return uobj;
ret = idr_add_uobj(uobj);
if (ret)
goto uobj_put;
ret = ib_rdmacg_try_charge(&uobj->cg_obj, ucontext->device,
RDMACG_RESOURCE_HCA_OBJECT);
if (ret)
goto idr_remove;
return uobj;
idr_remove:
uverbs_idr_remove_uobj(uobj);
uobj_put:
uverbs_uobject_put(uobj);
return ERR_PTR(ret);
}
static struct ib_uobject *alloc_begin_fd_uobject(const struct uverbs_obj_type *type,
struct ib_ucontext *ucontext)
{
const struct uverbs_obj_fd_type *fd_type =
container_of(type, struct uverbs_obj_fd_type, type);
int new_fd;
struct ib_uobject *uobj;
struct ib_uobject_file *uobj_file;
struct file *filp;
new_fd = get_unused_fd_flags(O_CLOEXEC);
if (new_fd < 0)
return ERR_PTR(new_fd);
uobj = alloc_uobj(ucontext, type);
if (IS_ERR(uobj)) {
put_unused_fd(new_fd);
return uobj;
}
uobj_file = container_of(uobj, struct ib_uobject_file, uobj);
filp = anon_inode_getfile(fd_type->name,
fd_type->fops,
uobj_file,
fd_type->flags);
if (IS_ERR(filp)) {
put_unused_fd(new_fd);
uverbs_uobject_put(uobj);
return (void *)filp;
}
uobj_file->uobj.id = new_fd;
uobj_file->uobj.object = filp;
uobj_file->ufile = ucontext->ufile;
INIT_LIST_HEAD(&uobj->list);
kref_get(&uobj_file->ufile->ref);
return uobj;
}
struct ib_uobject *rdma_alloc_begin_uobject(const struct uverbs_obj_type *type,
struct ib_ucontext *ucontext)
{
return type->type_class->alloc_begin(type, ucontext);
}
static void uverbs_uobject_add(struct ib_uobject *uobject)
{
mutex_lock(&uobject->context->uobjects_lock);
list_add(&uobject->list, &uobject->context->uobjects);
mutex_unlock(&uobject->context->uobjects_lock);
}
static int __must_check remove_commit_idr_uobject(struct ib_uobject *uobj,
enum rdma_remove_reason why)
{
const struct uverbs_obj_idr_type *idr_type =
container_of(uobj->type, struct uverbs_obj_idr_type,
type);
int ret = idr_type->destroy_object(uobj, why);
/*
* We can only fail gracefully if the user requested to destroy the
* object. In the rest of the cases, just remove whatever you can.
*/
if (why == RDMA_REMOVE_DESTROY && ret)
return ret;
ib_rdmacg_uncharge(&uobj->cg_obj, uobj->context->device,
RDMACG_RESOURCE_HCA_OBJECT);
uverbs_idr_remove_uobj(uobj);
return ret;
}
static void alloc_abort_fd_uobject(struct ib_uobject *uobj)
{
struct ib_uobject_file *uobj_file =
container_of(uobj, struct ib_uobject_file, uobj);
struct file *filp = uobj->object;
int id = uobj_file->uobj.id;
/* Unsuccessful NEW */
fput(filp);
put_unused_fd(id);
}
static int __must_check remove_commit_fd_uobject(struct ib_uobject *uobj,
enum rdma_remove_reason why)
{
const struct uverbs_obj_fd_type *fd_type =
container_of(uobj->type, struct uverbs_obj_fd_type, type);
struct ib_uobject_file *uobj_file =
container_of(uobj, struct ib_uobject_file, uobj);
int ret = fd_type->context_closed(uobj_file, why);
if (why == RDMA_REMOVE_DESTROY && ret)
return ret;
if (why == RDMA_REMOVE_DURING_CLEANUP) {
alloc_abort_fd_uobject(uobj);
return ret;
}
uobj_file->uobj.context = NULL;
return ret;
}
static void lockdep_check(struct ib_uobject *uobj, bool exclusive)
{
#ifdef CONFIG_LOCKDEP
if (exclusive)
WARN_ON(atomic_read(&uobj->usecnt) > 0);
else
WARN_ON(atomic_read(&uobj->usecnt) == -1);
#endif
}
static int __must_check _rdma_remove_commit_uobject(struct ib_uobject *uobj,
enum rdma_remove_reason why)
{
int ret;
struct ib_ucontext *ucontext = uobj->context;
ret = uobj->type->type_class->remove_commit(uobj, why);
if (ret && why == RDMA_REMOVE_DESTROY) {
/* We couldn't remove the object, so just unlock the uobject */
atomic_set(&uobj->usecnt, 0);
uobj->type->type_class->lookup_put(uobj, true);
} else {
mutex_lock(&ucontext->uobjects_lock);
list_del(&uobj->list);
mutex_unlock(&ucontext->uobjects_lock);
/* put the ref we took when we created the object */
uverbs_uobject_put(uobj);
}
return ret;
}
/* This is called only for user requested DESTROY reasons */
int __must_check rdma_remove_commit_uobject(struct ib_uobject *uobj)
{
int ret;
struct ib_ucontext *ucontext = uobj->context;
/* put the ref count we took at lookup_get */
uverbs_uobject_put(uobj);
/* Cleanup is running. Calling this should have been impossible */
if (!down_read_trylock(&ucontext->cleanup_rwsem)) {
WARN(true, "ib_uverbs: Cleanup is running while removing an uobject\n");
return 0;
}
lockdep_check(uobj, true);
ret = _rdma_remove_commit_uobject(uobj, RDMA_REMOVE_DESTROY);
up_read(&ucontext->cleanup_rwsem);
return ret;
}
static void alloc_commit_idr_uobject(struct ib_uobject *uobj)
{
uverbs_uobject_add(uobj);
spin_lock(&uobj->context->ufile->idr_lock);
/*
* We already allocated this IDR with a NULL object, so
* this shouldn't fail.
*/
WARN_ON(idr_replace(&uobj->context->ufile->idr,
uobj, uobj->id));
spin_unlock(&uobj->context->ufile->idr_lock);
}
static void alloc_commit_fd_uobject(struct ib_uobject *uobj)
{
struct ib_uobject_file *uobj_file =
container_of(uobj, struct ib_uobject_file, uobj);
uverbs_uobject_add(&uobj_file->uobj);
fd_install(uobj_file->uobj.id, uobj->object);
/* This shouldn't be used anymore. Use the file object instead */
uobj_file->uobj.id = 0;
/* Get another reference as we export this to the fops */
uverbs_uobject_get(&uobj_file->uobj);
}
int rdma_alloc_commit_uobject(struct ib_uobject *uobj)
{
/* Cleanup is running. Calling this should have been impossible */
if (!down_read_trylock(&uobj->context->cleanup_rwsem)) {
int ret;
WARN(true, "ib_uverbs: Cleanup is running while allocating an uobject\n");
ret = uobj->type->type_class->remove_commit(uobj,
RDMA_REMOVE_DURING_CLEANUP);
if (ret)
pr_warn("ib_uverbs: cleanup of idr object %d failed\n",
uobj->id);
return ret;
}
uobj->type->type_class->alloc_commit(uobj);
up_read(&uobj->context->cleanup_rwsem);
return 0;
}
static void alloc_abort_idr_uobject(struct ib_uobject *uobj)
{
uverbs_idr_remove_uobj(uobj);
ib_rdmacg_uncharge(&uobj->cg_obj, uobj->context->device,
RDMACG_RESOURCE_HCA_OBJECT);
uverbs_uobject_put(uobj);
}
void rdma_alloc_abort_uobject(struct ib_uobject *uobj)
{
uobj->type->type_class->alloc_abort(uobj);
}
static void lookup_put_idr_uobject(struct ib_uobject *uobj, bool exclusive)
{
}
static void lookup_put_fd_uobject(struct ib_uobject *uobj, bool exclusive)
{
struct file *filp = uobj->object;
WARN_ON(exclusive);
/* This indirectly calls uverbs_close_fd and free the object */
fput(filp);
}
void rdma_lookup_put_uobject(struct ib_uobject *uobj, bool exclusive)
{
lockdep_check(uobj, exclusive);
uobj->type->type_class->lookup_put(uobj, exclusive);
/*
* In order to unlock an object, either decrease its usecnt for
* read access or zero it in case of exclusive access. See
* uverbs_try_lock_object for locking schema information.
*/
if (!exclusive)
atomic_dec(&uobj->usecnt);
else
atomic_set(&uobj->usecnt, 0);
uverbs_uobject_put(uobj);
}
const struct uverbs_obj_type_class uverbs_idr_class = {
.alloc_begin = alloc_begin_idr_uobject,
.lookup_get = lookup_get_idr_uobject,
.alloc_commit = alloc_commit_idr_uobject,
.alloc_abort = alloc_abort_idr_uobject,
.lookup_put = lookup_put_idr_uobject,
.remove_commit = remove_commit_idr_uobject,
/*
* When we destroy an object, we first just lock it for WRITE and
* actually DESTROY it in the finalize stage. So, the problematic
* scenario is when we just started the finalize stage of the
* destruction (nothing was executed yet). Now, the other thread
* fetched the object for READ access, but it didn't lock it yet.
* The DESTROY thread continues and starts destroying the object.
* When the other thread continue - without the RCU, it would
* access freed memory. However, the rcu_read_lock delays the free
* until the rcu_read_lock of the READ operation quits. Since the
* exclusive lock of the object is still taken by the DESTROY flow, the
* READ operation will get -EBUSY and it'll just bail out.
*/
.needs_kfree_rcu = true,
};
static void _uverbs_close_fd(struct ib_uobject_file *uobj_file)
{
struct ib_ucontext *ucontext;
struct ib_uverbs_file *ufile = uobj_file->ufile;
int ret;
mutex_lock(&uobj_file->ufile->cleanup_mutex);
/* uobject was either already cleaned up or is cleaned up right now anyway */
if (!uobj_file->uobj.context ||
!down_read_trylock(&uobj_file->uobj.context->cleanup_rwsem))
goto unlock;
ucontext = uobj_file->uobj.context;
ret = _rdma_remove_commit_uobject(&uobj_file->uobj, RDMA_REMOVE_CLOSE);
up_read(&ucontext->cleanup_rwsem);
if (ret)
pr_warn("uverbs: unable to clean up uobject file in uverbs_close_fd.\n");
unlock:
mutex_unlock(&ufile->cleanup_mutex);
}
void uverbs_close_fd(struct file *f)
{
struct ib_uobject_file *uobj_file = f->private_data;
struct kref *uverbs_file_ref = &uobj_file->ufile->ref;
_uverbs_close_fd(uobj_file);
uverbs_uobject_put(&uobj_file->uobj);
kref_put(uverbs_file_ref, ib_uverbs_release_file);
}
void uverbs_cleanup_ucontext(struct ib_ucontext *ucontext, bool device_removed)
{
enum rdma_remove_reason reason = device_removed ?
RDMA_REMOVE_DRIVER_REMOVE : RDMA_REMOVE_CLOSE;
unsigned int cur_order = 0;
ucontext->cleanup_reason = reason;
/*
* Waits for all remove_commit and alloc_commit to finish. Logically, We
* want to hold this forever as the context is going to be destroyed,
* but we'll release it since it causes a "held lock freed" BUG message.
*/
down_write(&ucontext->cleanup_rwsem);
while (!list_empty(&ucontext->uobjects)) {
struct ib_uobject *obj, *next_obj;
unsigned int next_order = UINT_MAX;
/*
* This shouldn't run while executing other commands on this
* context. Thus, the only thing we should take care of is
* releasing a FD while traversing this list. The FD could be
* closed and released from the _release fop of this FD.
* In order to mitigate this, we add a lock.
* We take and release the lock per order traversal in order
* to let other threads (which might still use the FDs) chance
* to run.
*/
mutex_lock(&ucontext->uobjects_lock);
list_for_each_entry_safe(obj, next_obj, &ucontext->uobjects,
list) {
if (obj->type->destroy_order == cur_order) {
int ret;
/*
* if we hit this WARN_ON, that means we are
* racing with a lookup_get.
*/
WARN_ON(uverbs_try_lock_object(obj, true));
ret = obj->type->type_class->remove_commit(obj,
reason);
list_del(&obj->list);
if (ret)
pr_warn("ib_uverbs: failed to remove uobject id %d order %u\n",
obj->id, cur_order);
/* put the ref we took when we created the object */
uverbs_uobject_put(obj);
} else {
next_order = min(next_order,
obj->type->destroy_order);
}
}
mutex_unlock(&ucontext->uobjects_lock);
cur_order = next_order;
}
up_write(&ucontext->cleanup_rwsem);
}
void uverbs_initialize_ucontext(struct ib_ucontext *ucontext)
{
ucontext->cleanup_reason = 0;
mutex_init(&ucontext->uobjects_lock);
INIT_LIST_HEAD(&ucontext->uobjects);
init_rwsem(&ucontext->cleanup_rwsem);
}
const struct uverbs_obj_type_class uverbs_fd_class = {
.alloc_begin = alloc_begin_fd_uobject,
.lookup_get = lookup_get_fd_uobject,
.alloc_commit = alloc_commit_fd_uobject,
.alloc_abort = alloc_abort_fd_uobject,
.lookup_put = lookup_put_fd_uobject,
.remove_commit = remove_commit_fd_uobject,
.needs_kfree_rcu = false,
};
/*
* Copyright (c) 2005 Topspin Communications. All rights reserved.
* Copyright (c) 2005, 2006 Cisco Systems. All rights reserved.
* Copyright (c) 2005-2017 Mellanox Technologies. All rights reserved.
* Copyright (c) 2005 Voltaire, Inc. All rights reserved.
* Copyright (c) 2005 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef RDMA_CORE_H
#define RDMA_CORE_H
#include <linux/idr.h>
#include <rdma/uverbs_types.h>
#include <rdma/ib_verbs.h>
#include <linux/mutex.h>
/*
* These functions initialize the context and cleanups its uobjects.
* The context has a list of objects which is protected by a mutex
* on the context. initialize_ucontext should be called when we create
* a context.
* cleanup_ucontext removes all uobjects from the context and puts them.
*/
void uverbs_cleanup_ucontext(struct ib_ucontext *ucontext, bool device_removed);
void uverbs_initialize_ucontext(struct ib_ucontext *ucontext);
/*
* uverbs_uobject_get is called in order to increase the reference count on
* an uobject. This is useful when a handler wants to keep the uobject's memory
* alive, regardless if this uobject is still alive in the context's objects
* repository. Objects are put via uverbs_uobject_put.
*/
void uverbs_uobject_get(struct ib_uobject *uobject);
/*
* In order to indicate we no longer needs this uobject, uverbs_uobject_put
* is called. When the reference count is decreased, the uobject is freed.
* For example, this is used when attaching a completion channel to a CQ.
*/
void uverbs_uobject_put(struct ib_uobject *uobject);
/* Indicate this fd is no longer used by this consumer, but its memory isn't
* necessarily released yet. When the last reference is put, we release the
* memory. After this call is executed, calling uverbs_uobject_get isn't
* allowed.
* This must be called from the release file_operations of the file!
*/
void uverbs_close_fd(struct file *f);
#endif /* RDMA_CORE_H */
......@@ -76,12 +76,13 @@
* an asynchronous event queue file is created and released when the
* event file is closed.
*
* struct ib_uverbs_event_file: One reference is held by the VFS and
* released when the file is closed. For asynchronous event files,
* another reference is held by the corresponding main context file
* and released when that file is closed. For completion event files,
* a reference is taken when a CQ is created that uses the file, and
* released when the CQ is destroyed.
* struct ib_uverbs_event_queue: Base structure for
* struct ib_uverbs_async_event_file and struct ib_uverbs_completion_event_file.
* One reference is held by the VFS and released when the file is closed.
* For asynchronous event files, another reference is held by the corresponding
* main context file and released when that file is closed. For completion
* event files, a reference is taken when a CQ is created that uses the file,
* and released when the CQ is destroyed.
*/
struct ib_uverbs_device {
......@@ -101,18 +102,26 @@ struct ib_uverbs_device {
struct list_head uverbs_events_file_list;
};
struct ib_uverbs_event_file {
struct kref ref;
int is_async;
struct ib_uverbs_file *uverbs_file;
struct ib_uverbs_event_queue {
spinlock_t lock;
int is_closed;
wait_queue_head_t poll_wait;
struct fasync_struct *async_queue;
struct list_head event_list;
};
struct ib_uverbs_async_event_file {
struct ib_uverbs_event_queue ev_queue;
struct ib_uverbs_file *uverbs_file;
struct kref ref;
struct list_head list;
};
struct ib_uverbs_completion_event_file {
struct ib_uobject_file uobj_file;
struct ib_uverbs_event_queue ev_queue;
};
struct ib_uverbs_file {
struct kref ref;
struct mutex mutex;
......@@ -120,9 +129,13 @@ struct ib_uverbs_file {
struct ib_uverbs_device *device;
struct ib_ucontext *ucontext;
struct ib_event_handler event_handler;
struct ib_uverbs_event_file *async_file;
struct ib_uverbs_async_event_file *async_file;
struct list_head list;
int is_closed;
struct idr idr;
/* spinlock protects write access to idr */
spinlock_t idr_lock;
};
struct ib_uverbs_event {
......@@ -159,6 +172,8 @@ struct ib_usrq_object {
struct ib_uqp_object {
struct ib_uevent_object uevent;
/* lock for mcast list */
struct mutex mcast_lock;
struct list_head mcast_list;
struct ib_uxrcd_object *uxrcd;
};
......@@ -176,32 +191,18 @@ struct ib_ucq_object {
u32 async_events_reported;
};
extern spinlock_t ib_uverbs_idr_lock;
extern struct idr ib_uverbs_pd_idr;
extern struct idr ib_uverbs_mr_idr;
extern struct idr ib_uverbs_mw_idr;
extern struct idr ib_uverbs_ah_idr;
extern struct idr ib_uverbs_cq_idr;
extern struct idr ib_uverbs_qp_idr;
extern struct idr ib_uverbs_srq_idr;
extern struct idr ib_uverbs_xrcd_idr;
extern struct idr ib_uverbs_rule_idr;
extern struct idr ib_uverbs_wq_idr;
extern struct idr ib_uverbs_rwq_ind_tbl_idr;
void idr_remove_uobj(struct idr *idp, struct ib_uobject *uobj);
struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file,
struct ib_device *ib_dev,
int is_async);
extern const struct file_operations uverbs_event_fops;
void ib_uverbs_init_event_queue(struct ib_uverbs_event_queue *ev_queue);
struct file *ib_uverbs_alloc_async_event_file(struct ib_uverbs_file *uverbs_file,
struct ib_device *ib_dev);
void ib_uverbs_free_async_event_file(struct ib_uverbs_file *uverbs_file);
struct ib_uverbs_event_file *ib_uverbs_lookup_comp_file(int fd);
void ib_uverbs_release_ucq(struct ib_uverbs_file *file,
struct ib_uverbs_event_file *ev_file,
struct ib_uverbs_completion_event_file *ev_file,
struct ib_ucq_object *uobj);
void ib_uverbs_release_uevent(struct ib_uverbs_file *file,
struct ib_uevent_object *uobj);
void ib_uverbs_release_file(struct kref *ref);
void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context);
void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr);
......@@ -210,9 +211,12 @@ void ib_uverbs_wq_event_handler(struct ib_event *event, void *context_ptr);
void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr);
void ib_uverbs_event_handler(struct ib_event_handler *handler,
struct ib_event *event);
void ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev, struct ib_xrcd *xrcd);
int ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev, struct ib_xrcd *xrcd,
enum rdma_remove_reason why);
int uverbs_dealloc_mw(struct ib_mw *mw);
void ib_uverbs_detach_umcast(struct ib_qp *qp,
struct ib_uqp_object *uobj);
struct ib_uverbs_flow_spec {
union {
......
/*
* Copyright (c) 2017, Mellanox Technologies inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <rdma/uverbs_std_types.h>
#include <rdma/ib_user_verbs.h>
#include <rdma/ib_verbs.h>
#include <linux/bug.h>
#include <linux/file.h>
#include "rdma_core.h"
#include "uverbs.h"
int uverbs_free_ah(struct ib_uobject *uobject,
enum rdma_remove_reason why)
{
return ib_destroy_ah((struct ib_ah *)uobject->object);
}
int uverbs_free_flow(struct ib_uobject *uobject,
enum rdma_remove_reason why)
{
return ib_destroy_flow((struct ib_flow *)uobject->object);
}
int uverbs_free_mw(struct ib_uobject *uobject,
enum rdma_remove_reason why)
{
return uverbs_dealloc_mw((struct ib_mw *)uobject->object);
}
int uverbs_free_qp(struct ib_uobject *uobject,
enum rdma_remove_reason why)
{
struct ib_qp *qp = uobject->object;
struct ib_uqp_object *uqp =
container_of(uobject, struct ib_uqp_object, uevent.uobject);
int ret;
if (why == RDMA_REMOVE_DESTROY) {
if (!list_empty(&uqp->mcast_list))
return -EBUSY;
} else if (qp == qp->real_qp) {
ib_uverbs_detach_umcast(qp, uqp);
}
ret = ib_destroy_qp(qp);
if (ret && why == RDMA_REMOVE_DESTROY)
return ret;
if (uqp->uxrcd)
atomic_dec(&uqp->uxrcd->refcnt);
ib_uverbs_release_uevent(uobject->context->ufile, &uqp->uevent);
return ret;
}
int uverbs_free_rwq_ind_tbl(struct ib_uobject *uobject,
enum rdma_remove_reason why)
{
struct ib_rwq_ind_table *rwq_ind_tbl = uobject->object;
struct ib_wq **ind_tbl = rwq_ind_tbl->ind_tbl;
int ret;
ret = ib_destroy_rwq_ind_table(rwq_ind_tbl);
if (!ret || why != RDMA_REMOVE_DESTROY)
kfree(ind_tbl);
return ret;
}
int uverbs_free_wq(struct ib_uobject *uobject,
enum rdma_remove_reason why)
{
struct ib_wq *wq = uobject->object;
struct ib_uwq_object *uwq =
container_of(uobject, struct ib_uwq_object, uevent.uobject);
int ret;
ret = ib_destroy_wq(wq);
if (!ret || why != RDMA_REMOVE_DESTROY)
ib_uverbs_release_uevent(uobject->context->ufile, &uwq->uevent);
return ret;
}
int uverbs_free_srq(struct ib_uobject *uobject,
enum rdma_remove_reason why)
{
struct ib_srq *srq = uobject->object;
struct ib_uevent_object *uevent =
container_of(uobject, struct ib_uevent_object, uobject);
enum ib_srq_type srq_type = srq->srq_type;
int ret;
ret = ib_destroy_srq(srq);
if (ret && why == RDMA_REMOVE_DESTROY)
return ret;
if (srq_type == IB_SRQT_XRC) {
struct ib_usrq_object *us =
container_of(uevent, struct ib_usrq_object, uevent);
atomic_dec(&us->uxrcd->refcnt);
}
ib_uverbs_release_uevent(uobject->context->ufile, uevent);
return ret;
}
int uverbs_free_cq(struct ib_uobject *uobject,
enum rdma_remove_reason why)
{
struct ib_cq *cq = uobject->object;
struct ib_uverbs_event_queue *ev_queue = cq->cq_context;
struct ib_ucq_object *ucq =
container_of(uobject, struct ib_ucq_object, uobject);
int ret;
ret = ib_destroy_cq(cq);
if (!ret || why != RDMA_REMOVE_DESTROY)
ib_uverbs_release_ucq(uobject->context->ufile, ev_queue ?
container_of(ev_queue,
struct ib_uverbs_completion_event_file,
ev_queue) : NULL,
ucq);
return ret;
}
int uverbs_free_mr(struct ib_uobject *uobject,
enum rdma_remove_reason why)
{
return ib_dereg_mr((struct ib_mr *)uobject->object);
}
int uverbs_free_xrcd(struct ib_uobject *uobject,
enum rdma_remove_reason why)
{
struct ib_xrcd *xrcd = uobject->object;
struct ib_uxrcd_object *uxrcd =
container_of(uobject, struct ib_uxrcd_object, uobject);
int ret;
mutex_lock(&uobject->context->ufile->device->xrcd_tree_mutex);
if (why == RDMA_REMOVE_DESTROY && atomic_read(&uxrcd->refcnt))
ret = -EBUSY;
else
ret = ib_uverbs_dealloc_xrcd(uobject->context->ufile->device,
xrcd, why);
mutex_unlock(&uobject->context->ufile->device->xrcd_tree_mutex);
return ret;
}
int uverbs_free_pd(struct ib_uobject *uobject,
enum rdma_remove_reason why)
{
struct ib_pd *pd = uobject->object;
if (why == RDMA_REMOVE_DESTROY && atomic_read(&pd->usecnt))
return -EBUSY;
ib_dealloc_pd((struct ib_pd *)uobject->object);
return 0;
}
int uverbs_hot_unplug_completion_event_file(struct ib_uobject_file *uobj_file,
enum rdma_remove_reason why)
{
struct ib_uverbs_completion_event_file *comp_event_file =
container_of(uobj_file, struct ib_uverbs_completion_event_file,
uobj_file);
struct ib_uverbs_event_queue *event_queue = &comp_event_file->ev_queue;
spin_lock_irq(&event_queue->lock);
event_queue->is_closed = 1;
spin_unlock_irq(&event_queue->lock);
if (why == RDMA_REMOVE_DRIVER_REMOVE) {
wake_up_interruptible(&event_queue->poll_wait);
kill_fasync(&event_queue->async_queue, SIGIO, POLL_IN);
}
return 0;
};
const struct uverbs_obj_fd_type uverbs_type_attrs_comp_channel = {
.type = UVERBS_TYPE_ALLOC_FD(sizeof(struct ib_uverbs_completion_event_file), 0),
.context_closed = uverbs_hot_unplug_completion_event_file,
.fops = &uverbs_event_fops,
.name = "[infinibandevent]",
.flags = O_RDONLY,
};
const struct uverbs_obj_idr_type uverbs_type_attrs_cq = {
.type = UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_ucq_object), 0),
.destroy_object = uverbs_free_cq,
};
const struct uverbs_obj_idr_type uverbs_type_attrs_qp = {
.type = UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uqp_object), 0),
.destroy_object = uverbs_free_qp,
};
const struct uverbs_obj_idr_type uverbs_type_attrs_mw = {
.type = UVERBS_TYPE_ALLOC_IDR(0),
.destroy_object = uverbs_free_mw,
};
const struct uverbs_obj_idr_type uverbs_type_attrs_mr = {
/* 1 is used in order to free the MR after all the MWs */
.type = UVERBS_TYPE_ALLOC_IDR(1),
.destroy_object = uverbs_free_mr,
};
const struct uverbs_obj_idr_type uverbs_type_attrs_srq = {
.type = UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_usrq_object), 0),
.destroy_object = uverbs_free_srq,
};
const struct uverbs_obj_idr_type uverbs_type_attrs_ah = {
.type = UVERBS_TYPE_ALLOC_IDR(0),
.destroy_object = uverbs_free_ah,
};
const struct uverbs_obj_idr_type uverbs_type_attrs_flow = {
.type = UVERBS_TYPE_ALLOC_IDR(0),
.destroy_object = uverbs_free_flow,
};
const struct uverbs_obj_idr_type uverbs_type_attrs_wq = {
.type = UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uwq_object), 0),
.destroy_object = uverbs_free_wq,
};
const struct uverbs_obj_idr_type uverbs_type_attrs_rwq_ind_table = {
.type = UVERBS_TYPE_ALLOC_IDR(0),
.destroy_object = uverbs_free_rwq_ind_tbl,
};
const struct uverbs_obj_idr_type uverbs_type_attrs_xrcd = {
.type = UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uxrcd_object), 0),
.destroy_object = uverbs_free_xrcd,
};
const struct uverbs_obj_idr_type uverbs_type_attrs_pd = {
/* 2 is used in order to free the PD after MRs */
.type = UVERBS_TYPE_ALLOC_IDR(2),
.destroy_object = uverbs_free_pd,
};
/*
* Copyright(c) 2015, 2016 Intel Corporation.
* Copyright(c) 2015 - 2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
......@@ -64,6 +64,7 @@
#include "platform.h"
#include "aspm.h"
#include "affinity.h"
#include "debugfs.h"
#define NUM_IB_PORTS 1
......@@ -1045,6 +1046,7 @@ static void dc_start(struct hfi1_devdata *);
static int qos_rmt_entries(struct hfi1_devdata *dd, unsigned int *mp,
unsigned int *np);
static void clear_full_mgmt_pkey(struct hfi1_pportdata *ppd);
static int wait_link_transfer_active(struct hfi1_devdata *dd, int wait_ms);
/*
* Error interrupt table entry. This is used as input to the interrupt
......@@ -7165,7 +7167,7 @@ static void get_link_widths(struct hfi1_devdata *dd, u16 *tx_width,
* set the max_rate field in handle_verify_cap until v0.19.
*/
if ((dd->icode == ICODE_RTL_SILICON) &&
(dd->dc8051_ver < dc8051_ver(0, 19))) {
(dd->dc8051_ver < dc8051_ver(0, 19, 0))) {
/* max_rate: 0 = 12.5G, 1 = 25G */
switch (max_rate) {
case 0:
......@@ -7350,7 +7352,7 @@ void handle_verify_cap(struct work_struct *work)
}
ppd->link_speed_active = 0; /* invalid value */
if (dd->dc8051_ver < dc8051_ver(0, 20)) {
if (dd->dc8051_ver < dc8051_ver(0, 20, 0)) {
/* remote_tx_rate: 0 = 12.5G, 1 = 25G */
switch (remote_tx_rate) {
case 0:
......@@ -7897,6 +7899,9 @@ static void handle_dcc_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
reg &= ~DCC_ERR_FLG_EN_CSR_ACCESS_BLOCKED_HOST_SMASK;
}
if (unlikely(hfi1_dbg_fault_suppress_err(&dd->verbs_dev)))
reg &= ~DCC_ERR_FLG_LATE_EBP_ERR_SMASK;
/* report any remaining errors */
if (reg)
dd_dev_info_ratelimited(dd, "DCC Error: %s\n",
......@@ -8343,6 +8348,52 @@ static int read_lcb_via_8051(struct hfi1_devdata *dd, u32 addr, u64 *data)
return 0;
}
/*
* Provide a cache for some of the LCB registers in case the LCB is
* unavailable.
* (The LCB is unavailable in certain link states, for example.)
*/
struct lcb_datum {
u32 off;
u64 val;
};
static struct lcb_datum lcb_cache[] = {
{ DC_LCB_ERR_INFO_RX_REPLAY_CNT, 0},
{ DC_LCB_ERR_INFO_SEQ_CRC_CNT, 0 },
{ DC_LCB_ERR_INFO_REINIT_FROM_PEER_CNT, 0 },
};
static void update_lcb_cache(struct hfi1_devdata *dd)
{
int i;
int ret;
u64 val;
for (i = 0; i < ARRAY_SIZE(lcb_cache); i++) {
ret = read_lcb_csr(dd, lcb_cache[i].off, &val);
/* Update if we get good data */
if (likely(ret != -EBUSY))
lcb_cache[i].val = val;
}
}
static int read_lcb_cache(u32 off, u64 *val)
{
int i;
for (i = 0; i < ARRAY_SIZE(lcb_cache); i++) {
if (lcb_cache[i].off == off) {
*val = lcb_cache[i].val;
return 0;
}
}
pr_warn("%s bad offset 0x%x\n", __func__, off);
return -1;
}
/*
* Read an LCB CSR. Access may not be in host control, so check.
* Return 0 on success, -EBUSY on failure.
......@@ -8354,9 +8405,13 @@ int read_lcb_csr(struct hfi1_devdata *dd, u32 addr, u64 *data)
/* if up, go through the 8051 for the value */
if (ppd->host_link_state & HLS_UP)
return read_lcb_via_8051(dd, addr, data);
/* if going up or down, no access */
if (ppd->host_link_state & (HLS_GOING_UP | HLS_GOING_OFFLINE))
return -EBUSY;
/* if going up or down, check the cache, otherwise, no access */
if (ppd->host_link_state & (HLS_GOING_UP | HLS_GOING_OFFLINE)) {
if (read_lcb_cache(addr, data))
return -EBUSY;
return 0;
}
/* otherwise, host has access */
*data = read_csr(dd, addr);
return 0;
......@@ -8371,7 +8426,7 @@ static int write_lcb_via_8051(struct hfi1_devdata *dd, u32 addr, u64 data)
int ret;
if (dd->icode == ICODE_FUNCTIONAL_SIMULATOR ||
(dd->dc8051_ver < dc8051_ver(0, 20))) {
(dd->dc8051_ver < dc8051_ver(0, 20, 0))) {
if (acquire_lcb_access(dd, 0) == 0) {
write_csr(dd, addr, data);
release_lcb_access(dd, 0);
......@@ -8677,13 +8732,20 @@ static void read_remote_device_id(struct hfi1_devdata *dd, u16 *device_id,
& REMOTE_DEVICE_REV_MASK;
}
void read_misc_status(struct hfi1_devdata *dd, u8 *ver_a, u8 *ver_b)
void read_misc_status(struct hfi1_devdata *dd, u8 *ver_major, u8 *ver_minor,
u8 *ver_patch)
{
u32 frame;
read_8051_config(dd, MISC_STATUS, GENERAL_CONFIG, &frame);
*ver_a = (frame >> STS_FM_VERSION_A_SHIFT) & STS_FM_VERSION_A_MASK;
*ver_b = (frame >> STS_FM_VERSION_B_SHIFT) & STS_FM_VERSION_B_MASK;
*ver_major = (frame >> STS_FM_VERSION_MAJOR_SHIFT) &
STS_FM_VERSION_MAJOR_MASK;
*ver_minor = (frame >> STS_FM_VERSION_MINOR_SHIFT) &
STS_FM_VERSION_MINOR_MASK;
read_8051_config(dd, VERSION_PATCH, GENERAL_CONFIG, &frame);
*ver_patch = (frame >> STS_FM_VERSION_PATCH_SHIFT) &
STS_FM_VERSION_PATCH_MASK;
}
static void read_vc_remote_phy(struct hfi1_devdata *dd, u8 *power_management,
......@@ -8891,8 +8953,6 @@ int send_idle_sma(struct hfi1_devdata *dd, u64 message)
*/
static int do_quick_linkup(struct hfi1_devdata *dd)
{
u64 reg;
unsigned long timeout;
int ret;
lcb_shutdown(dd, 0);
......@@ -8915,19 +8975,9 @@ static int do_quick_linkup(struct hfi1_devdata *dd)
write_csr(dd, DC_LCB_CFG_RUN,
1ull << DC_LCB_CFG_RUN_EN_SHIFT);
/* watch LCB_STS_LINK_TRANSFER_ACTIVE */
timeout = jiffies + msecs_to_jiffies(10);
while (1) {
reg = read_csr(dd, DC_LCB_STS_LINK_TRANSFER_ACTIVE);
if (reg)
break;
if (time_after(jiffies, timeout)) {
dd_dev_err(dd,
"timeout waiting for LINK_TRANSFER_ACTIVE\n");
return -ETIMEDOUT;
}
udelay(2);
}
ret = wait_link_transfer_active(dd, 10);
if (ret)
return ret;
write_csr(dd, DC_LCB_CFG_ALLOW_LINK_UP,
1ull << DC_LCB_CFG_ALLOW_LINK_UP_VAL_SHIFT);
......@@ -9091,7 +9141,7 @@ static int set_local_link_attributes(struct hfi1_pportdata *ppd)
if (ret)
goto set_local_link_attributes_fail;
if (dd->dc8051_ver < dc8051_ver(0, 20)) {
if (dd->dc8051_ver < dc8051_ver(0, 20, 0)) {
/* set the tx rate to the fastest enabled */
if (ppd->link_speed_enabled & OPA_LINK_SPEED_25G)
ppd->local_tx_rate = 1;
......@@ -9494,8 +9544,11 @@ static int test_qsfp_read(struct hfi1_pportdata *ppd)
int ret;
u8 status;
/* report success if not a QSFP */
if (ppd->port_type != PORT_TYPE_QSFP)
/*
* Report success if not a QSFP or, if it is a QSFP, but the cable is
* not present
*/
if (ppd->port_type != PORT_TYPE_QSFP || !qsfp_mod_present(ppd))
return 0;
/* read byte 2, the status byte */
......@@ -10082,6 +10135,64 @@ static void check_lni_states(struct hfi1_pportdata *ppd)
decode_state_complete(ppd, last_remote_state, "received");
}
/* wait for wait_ms for LINK_TRANSFER_ACTIVE to go to 1 */
static int wait_link_transfer_active(struct hfi1_devdata *dd, int wait_ms)
{
u64 reg;
unsigned long timeout;
/* watch LCB_STS_LINK_TRANSFER_ACTIVE */
timeout = jiffies + msecs_to_jiffies(wait_ms);
while (1) {
reg = read_csr(dd, DC_LCB_STS_LINK_TRANSFER_ACTIVE);
if (reg)
break;
if (time_after(jiffies, timeout)) {
dd_dev_err(dd,
"timeout waiting for LINK_TRANSFER_ACTIVE\n");
return -ETIMEDOUT;
}
udelay(2);
}
return 0;
}
/* called when the logical link state is not down as it should be */
static void force_logical_link_state_down(struct hfi1_pportdata *ppd)
{
struct hfi1_devdata *dd = ppd->dd;
/*
* Bring link up in LCB loopback
*/
write_csr(dd, DC_LCB_CFG_TX_FIFOS_RESET, 1);
write_csr(dd, DC_LCB_CFG_IGNORE_LOST_RCLK,
DC_LCB_CFG_IGNORE_LOST_RCLK_EN_SMASK);
write_csr(dd, DC_LCB_CFG_LANE_WIDTH, 0);
write_csr(dd, DC_LCB_CFG_REINIT_AS_SLAVE, 0);
write_csr(dd, DC_LCB_CFG_CNT_FOR_SKIP_STALL, 0x110);
write_csr(dd, DC_LCB_CFG_LOOPBACK, 0x2);
write_csr(dd, DC_LCB_CFG_TX_FIFOS_RESET, 0);
(void)read_csr(dd, DC_LCB_CFG_TX_FIFOS_RESET);
udelay(3);
write_csr(dd, DC_LCB_CFG_ALLOW_LINK_UP, 1);
write_csr(dd, DC_LCB_CFG_RUN, 1ull << DC_LCB_CFG_RUN_EN_SHIFT);
wait_link_transfer_active(dd, 100);
/*
* Bring the link down again.
*/
write_csr(dd, DC_LCB_CFG_TX_FIFOS_RESET, 1);
write_csr(dd, DC_LCB_CFG_ALLOW_LINK_UP, 0);
write_csr(dd, DC_LCB_CFG_IGNORE_LOST_RCLK, 0);
/* call again to adjust ppd->statusp, if needed */
get_logical_state(ppd);
}
/*
* Helper for set_link_state(). Do not call except from that routine.
* Expects ppd->hls_mutex to be held.
......@@ -10098,6 +10209,8 @@ static int goto_offline(struct hfi1_pportdata *ppd, u8 rem_reason)
int do_transition;
int do_wait;
update_lcb_cache(dd);
previous_state = ppd->host_link_state;
ppd->host_link_state = HLS_GOING_OFFLINE;
pstate = read_physical_state(dd);
......@@ -10135,15 +10248,18 @@ static int goto_offline(struct hfi1_pportdata *ppd, u8 rem_reason)
return ret;
}
/* make sure the logical state is also down */
wait_logical_linkstate(ppd, IB_PORT_DOWN, 1000);
/*
* Now in charge of LCB - must be after the physical state is
* offline.quiet and before host_link_state is changed.
*/
set_host_lcb_access(dd);
write_csr(dd, DC_LCB_ERR_EN, ~0ull); /* watch LCB errors */
/* make sure the logical state is also down */
ret = wait_logical_linkstate(ppd, IB_PORT_DOWN, 1000);
if (ret)
force_logical_link_state_down(ppd);
ppd->host_link_state = HLS_LINK_COOLDOWN; /* LCB access allowed */
if (ppd->port_type == PORT_TYPE_QSFP &&
......
#ifndef _CHIP_H
#define _CHIP_H
/*
* Copyright(c) 2015, 2016 Intel Corporation.
* Copyright(c) 2015 - 2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
......@@ -394,7 +394,8 @@
#define LAST_REMOTE_STATE_COMPLETE 0x13
#define LINK_QUALITY_INFO 0x14
#define REMOTE_DEVICE_ID 0x15
#define LINK_DOWN_REASON 0x16
#define LINK_DOWN_REASON 0x16 /* first byte of offset 0x16 */
#define VERSION_PATCH 0x16 /* last byte of offset 0x16 */
/* 8051 lane specific register field IDs */
#define TX_EQ_SETTINGS 0x00
......@@ -524,10 +525,12 @@ enum {
#define SUPPORTED_CRCS (CAP_CRC_14B | CAP_CRC_48B)
/* misc status version fields */
#define STS_FM_VERSION_A_SHIFT 16
#define STS_FM_VERSION_A_MASK 0xff
#define STS_FM_VERSION_B_SHIFT 24
#define STS_FM_VERSION_B_MASK 0xff
#define STS_FM_VERSION_MINOR_SHIFT 16
#define STS_FM_VERSION_MINOR_MASK 0xff
#define STS_FM_VERSION_MAJOR_SHIFT 24
#define STS_FM_VERSION_MAJOR_MASK 0xff
#define STS_FM_VERSION_PATCH_SHIFT 24
#define STS_FM_VERSION_PATCH_MASK 0xff
/* LCB_CFG_CRC_MODE TX_VAL and RX_VAL CRC mode values */
#define LCB_CRC_16B 0x0 /* 16b CRC */
......@@ -698,7 +701,8 @@ void fabric_serdes_reset(struct hfi1_devdata *dd);
int read_8051_data(struct hfi1_devdata *dd, u32 addr, u32 len, u64 *result);
/* chip.c */
void read_misc_status(struct hfi1_devdata *dd, u8 *ver_a, u8 *ver_b);
void read_misc_status(struct hfi1_devdata *dd, u8 *ver_major, u8 *ver_minor,
u8 *ver_patch);
void read_guid(struct hfi1_devdata *dd);
int wait_fm_ready(struct hfi1_devdata *dd, u32 mstimeout);
void set_link_down_reason(struct hfi1_pportdata *ppd, u8 lcl_reason,
......
......@@ -51,8 +51,12 @@
#include <linux/export.h>
#include <linux/module.h>
#include <linux/string.h>
#include <linux/types.h>
#include <linux/ratelimit.h>
#include <linux/fault-inject.h>
#include "hfi.h"
#include "trace.h"
#include "debugfs.h"
#include "device.h"
#include "qp.h"
......@@ -1063,6 +1067,222 @@ DEBUGFS_SEQ_FILE_OPS(sdma_cpu_list);
DEBUGFS_SEQ_FILE_OPEN(sdma_cpu_list)
DEBUGFS_FILE_OPS(sdma_cpu_list);
#ifdef CONFIG_FAULT_INJECTION
static void *_fault_stats_seq_start(struct seq_file *s, loff_t *pos)
{
struct hfi1_opcode_stats_perctx *opstats;
if (*pos >= ARRAY_SIZE(opstats->stats))
return NULL;
return pos;
}
static void *_fault_stats_seq_next(struct seq_file *s, void *v, loff_t *pos)
{
struct hfi1_opcode_stats_perctx *opstats;
++*pos;
if (*pos >= ARRAY_SIZE(opstats->stats))
return NULL;
return pos;
}
static void _fault_stats_seq_stop(struct seq_file *s, void *v)
{
}
static int _fault_stats_seq_show(struct seq_file *s, void *v)
{
loff_t *spos = v;
loff_t i = *spos, j;
u64 n_packets = 0, n_bytes = 0;
struct hfi1_ibdev *ibd = (struct hfi1_ibdev *)s->private;
struct hfi1_devdata *dd = dd_from_dev(ibd);
for (j = 0; j < dd->first_user_ctxt; j++) {
if (!dd->rcd[j])
continue;
n_packets += dd->rcd[j]->opstats->stats[i].n_packets;
n_bytes += dd->rcd[j]->opstats->stats[i].n_bytes;
}
if (!n_packets && !n_bytes)
return SEQ_SKIP;
if (!ibd->fault_opcode->n_rxfaults[i] &&
!ibd->fault_opcode->n_txfaults[i])
return SEQ_SKIP;
seq_printf(s, "%02llx %llu/%llu (faults rx:%llu faults: tx:%llu)\n", i,
(unsigned long long)n_packets,
(unsigned long long)n_bytes,
(unsigned long long)ibd->fault_opcode->n_rxfaults[i],
(unsigned long long)ibd->fault_opcode->n_txfaults[i]);
return 0;
}
DEBUGFS_SEQ_FILE_OPS(fault_stats);
DEBUGFS_SEQ_FILE_OPEN(fault_stats);
DEBUGFS_FILE_OPS(fault_stats);
static void fault_exit_opcode_debugfs(struct hfi1_ibdev *ibd)
{
debugfs_remove_recursive(ibd->fault_opcode->dir);
kfree(ibd->fault_opcode);
ibd->fault_opcode = NULL;
}
static int fault_init_opcode_debugfs(struct hfi1_ibdev *ibd)
{
struct dentry *parent = ibd->hfi1_ibdev_dbg;
ibd->fault_opcode = kzalloc(sizeof(*ibd->fault_opcode), GFP_KERNEL);
if (!ibd->fault_opcode)
return -ENOMEM;
ibd->fault_opcode->attr.interval = 1;
ibd->fault_opcode->attr.require_end = ULONG_MAX;
ibd->fault_opcode->attr.stacktrace_depth = 32;
ibd->fault_opcode->attr.dname = NULL;
ibd->fault_opcode->attr.verbose = 0;
ibd->fault_opcode->fault_by_opcode = false;
ibd->fault_opcode->opcode = 0;
ibd->fault_opcode->mask = 0xff;
ibd->fault_opcode->dir =
fault_create_debugfs_attr("fault_opcode",
parent,
&ibd->fault_opcode->attr);
if (IS_ERR(ibd->fault_opcode->dir)) {
kfree(ibd->fault_opcode);
return -ENOENT;
}
DEBUGFS_SEQ_FILE_CREATE(fault_stats, ibd->fault_opcode->dir, ibd);
if (!debugfs_create_bool("fault_by_opcode", 0600,
ibd->fault_opcode->dir,
&ibd->fault_opcode->fault_by_opcode))
goto fail;
if (!debugfs_create_x8("opcode", 0600, ibd->fault_opcode->dir,
&ibd->fault_opcode->opcode))
goto fail;
if (!debugfs_create_x8("mask", 0600, ibd->fault_opcode->dir,
&ibd->fault_opcode->mask))
goto fail;
return 0;
fail:
fault_exit_opcode_debugfs(ibd);
return -ENOMEM;
}
static void fault_exit_packet_debugfs(struct hfi1_ibdev *ibd)
{
debugfs_remove_recursive(ibd->fault_packet->dir);
kfree(ibd->fault_packet);
ibd->fault_packet = NULL;
}
static int fault_init_packet_debugfs(struct hfi1_ibdev *ibd)
{
struct dentry *parent = ibd->hfi1_ibdev_dbg;
ibd->fault_packet = kzalloc(sizeof(*ibd->fault_packet), GFP_KERNEL);
if (!ibd->fault_packet)
return -ENOMEM;
ibd->fault_packet->attr.interval = 1;
ibd->fault_packet->attr.require_end = ULONG_MAX;
ibd->fault_packet->attr.stacktrace_depth = 32;
ibd->fault_packet->attr.dname = NULL;
ibd->fault_packet->attr.verbose = 0;
ibd->fault_packet->fault_by_packet = false;
ibd->fault_packet->dir =
fault_create_debugfs_attr("fault_packet",
parent,
&ibd->fault_opcode->attr);
if (IS_ERR(ibd->fault_packet->dir)) {
kfree(ibd->fault_packet);
return -ENOENT;
}
if (!debugfs_create_bool("fault_by_packet", 0600,
ibd->fault_packet->dir,
&ibd->fault_packet->fault_by_packet))
goto fail;
if (!debugfs_create_u64("fault_stats", 0400,
ibd->fault_packet->dir,
&ibd->fault_packet->n_faults))
goto fail;
return 0;
fail:
fault_exit_packet_debugfs(ibd);
return -ENOMEM;
}
static void fault_exit_debugfs(struct hfi1_ibdev *ibd)
{
fault_exit_opcode_debugfs(ibd);
fault_exit_packet_debugfs(ibd);
}
static int fault_init_debugfs(struct hfi1_ibdev *ibd)
{
int ret = 0;
ret = fault_init_opcode_debugfs(ibd);
if (ret)
return ret;
ret = fault_init_packet_debugfs(ibd);
if (ret)
fault_exit_opcode_debugfs(ibd);
return ret;
}
bool hfi1_dbg_fault_suppress_err(struct hfi1_ibdev *ibd)
{
return ibd->fault_suppress_err;
}
bool hfi1_dbg_fault_opcode(struct rvt_qp *qp, u32 opcode, bool rx)
{
bool ret = false;
struct hfi1_ibdev *ibd = to_idev(qp->ibqp.device);
if (!ibd->fault_opcode || !ibd->fault_opcode->fault_by_opcode)
return false;
if (ibd->fault_opcode->opcode != (opcode & ibd->fault_opcode->mask))
return false;
ret = should_fail(&ibd->fault_opcode->attr, 1);
if (ret) {
trace_hfi1_fault_opcode(qp, opcode);
if (rx)
ibd->fault_opcode->n_rxfaults[opcode]++;
else
ibd->fault_opcode->n_txfaults[opcode]++;
}
return ret;
}
bool hfi1_dbg_fault_packet(struct hfi1_packet *packet)
{
struct rvt_dev_info *rdi = &packet->rcd->ppd->dd->verbs_dev.rdi;
struct hfi1_ibdev *ibd = dev_from_rdi(rdi);
bool ret = false;
if (!ibd->fault_packet || !ibd->fault_packet->fault_by_packet)
return false;
ret = should_fail(&ibd->fault_packet->attr, 1);
if (ret) {
++ibd->fault_packet->n_faults;
trace_hfi1_fault_packet(packet);
}
return ret;
}
#endif
void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd)
{
char name[sizeof("port0counters") + 1];
......@@ -1112,12 +1332,22 @@ void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd)
!port_cntr_ops[i].ops.write ?
S_IRUGO : S_IRUGO | S_IWUSR);
}
#ifdef CONFIG_FAULT_INJECTION
debugfs_create_bool("fault_suppress_err", 0600,
ibd->hfi1_ibdev_dbg,
&ibd->fault_suppress_err);
fault_init_debugfs(ibd);
#endif
}
void hfi1_dbg_ibdev_exit(struct hfi1_ibdev *ibd)
{
if (!hfi1_dbg_root)
goto out;
#ifdef CONFIG_FAULT_INJECTION
fault_exit_debugfs(ibd);
#endif
debugfs_remove(ibd->hfi1_ibdev_link);
debugfs_remove_recursive(ibd->hfi1_ibdev_dbg);
out:
......
......@@ -53,23 +53,79 @@ void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd);
void hfi1_dbg_ibdev_exit(struct hfi1_ibdev *ibd);
void hfi1_dbg_init(void);
void hfi1_dbg_exit(void);
#ifdef CONFIG_FAULT_INJECTION
#include <linux/fault-inject.h>
struct fault_opcode {
struct fault_attr attr;
struct dentry *dir;
bool fault_by_opcode;
u64 n_rxfaults[256];
u64 n_txfaults[256];
u8 opcode;
u8 mask;
};
struct fault_packet {
struct fault_attr attr;
struct dentry *dir;
bool fault_by_packet;
u64 n_faults;
};
bool hfi1_dbg_fault_opcode(struct rvt_qp *qp, u32 opcode, bool rx);
bool hfi1_dbg_fault_packet(struct hfi1_packet *packet);
bool hfi1_dbg_fault_suppress_err(struct hfi1_ibdev *ibd);
#else
static inline bool hfi1_dbg_fault_packet(struct hfi1_packet *packet)
{
return false;
}
static inline bool hfi1_dbg_fault_opcode(struct rvt_qp *qp,
u32 opcode, bool rx)
{
return false;
}
static inline bool hfi1_dbg_fault_suppress_err(struct hfi1_ibdev *ibd)
{
return false;
}
#endif
#else
static inline void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd)
{
}
void hfi1_dbg_ibdev_exit(struct hfi1_ibdev *ibd)
static inline void hfi1_dbg_ibdev_exit(struct hfi1_ibdev *ibd)
{
}
static inline void hfi1_dbg_init(void)
{
}
void hfi1_dbg_init(void)
static inline void hfi1_dbg_exit(void)
{
}
void hfi1_dbg_exit(void)
static inline bool hfi1_dbg_fault_packet(struct hfi1_packet *packet)
{
return false;
}
static inline bool hfi1_dbg_fault_opcode(struct rvt_qp *qp,
u32 opcode, bool rx)
{
return false;
}
static inline bool hfi1_dbg_fault_suppress_err(struct hfi1_ibdev *ibd)
{
return false;
}
#endif
#endif /* _HFI1_DEBUGFS_H */
......@@ -59,6 +59,7 @@
#include "trace.h"
#include "qp.h"
#include "sdma.h"
#include "debugfs.h"
#undef pr_fmt
#define pr_fmt(fmt) DRIVER_NAME ": " fmt
......@@ -1354,6 +1355,9 @@ void handle_eflags(struct hfi1_packet *packet)
*/
int process_receive_ib(struct hfi1_packet *packet)
{
if (unlikely(hfi1_dbg_fault_packet(packet)))
return RHF_RCV_CONTINUE;
trace_hfi1_rcvhdr(packet->rcd->ppd->dd,
packet->rcd->ctxt,
rhf_err_flags(packet->rhf),
......@@ -1363,6 +1367,11 @@ int process_receive_ib(struct hfi1_packet *packet)
packet->updegr,
rhf_egr_index(packet->rhf));
if (unlikely(
(hfi1_dbg_fault_suppress_err(&packet->rcd->dd->verbs_dev) &&
(packet->rhf & RHF_DC_ERR))))
return RHF_RCV_CONTINUE;
if (unlikely(rhf_err_flags(packet->rhf))) {
handle_eflags(packet);
return RHF_RCV_CONTINUE;
......@@ -1398,6 +1407,12 @@ int process_receive_bypass(struct hfi1_packet *packet)
int process_receive_error(struct hfi1_packet *packet)
{
/* KHdrHCRCErr -- KDETH packet with a bad HCRC */
if (unlikely(
hfi1_dbg_fault_suppress_err(&packet->rcd->dd->verbs_dev) &&
rhf_rcv_type_err(packet->rhf) == 3))
return RHF_RCV_CONTINUE;
handle_eflags(packet);
if (unlikely(rhf_err_flags(packet->rhf)))
......@@ -1409,6 +1424,8 @@ int process_receive_error(struct hfi1_packet *packet)
int kdeth_process_expected(struct hfi1_packet *packet)
{
if (unlikely(hfi1_dbg_fault_packet(packet)))
return RHF_RCV_CONTINUE;
if (unlikely(rhf_err_flags(packet->rhf)))
handle_eflags(packet);
......@@ -1421,6 +1438,8 @@ int kdeth_process_eager(struct hfi1_packet *packet)
{
if (unlikely(rhf_err_flags(packet->rhf)))
handle_eflags(packet);
if (unlikely(hfi1_dbg_fault_packet(packet)))
return RHF_RCV_CONTINUE;
dd_dev_err(packet->rcd->dd,
"Unhandled eager packet received. Dropping.\n");
......
/*
* Copyright(c) 2015, 2016 Intel Corporation.
* Copyright(c) 2015 - 2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
......@@ -1004,7 +1004,9 @@ static int load_8051_firmware(struct hfi1_devdata *dd,
{
u64 reg;
int ret;
u8 ver_a, ver_b;
u8 ver_major;
u8 ver_minor;
u8 ver_patch;
/*
* DC Reset sequence
......@@ -1073,10 +1075,10 @@ static int load_8051_firmware(struct hfi1_devdata *dd,
return -ETIMEDOUT;
}
read_misc_status(dd, &ver_a, &ver_b);
dd_dev_info(dd, "8051 firmware version %d.%d\n",
(int)ver_b, (int)ver_a);
dd->dc8051_ver = dc8051_ver(ver_b, ver_a);
read_misc_status(dd, &ver_major, &ver_minor, &ver_patch);
dd_dev_info(dd, "8051 firmware version %d.%d.%d\n",
(int)ver_major, (int)ver_minor, (int)ver_patch);
dd->dc8051_ver = dc8051_ver(ver_major, ver_minor, ver_patch);
return 0;
}
......
......@@ -1020,7 +1020,7 @@ struct hfi1_devdata {
u8 qos_shift;
u16 irev; /* implementation revision */
u16 dc8051_ver; /* 8051 firmware version */
u32 dc8051_ver; /* 8051 firmware version */
spinlock_t hfi1_diag_trans_lock; /* protect diag observer ops */
struct platform_config platform_config;
......@@ -1167,15 +1167,16 @@ struct hfi1_devdata {
bool eprom_available; /* true if EPROM is available for this device */
bool aspm_supported; /* Does HW support ASPM */
bool aspm_enabled; /* ASPM state: enabled/disabled */
struct rhashtable sdma_rht;
struct rhashtable *sdma_rht;
struct kobject kobj;
};
/* 8051 firmware version helper */
#define dc8051_ver(a, b) ((a) << 8 | (b))
#define dc8051_ver_maj(a) ((a & 0xff00) >> 8)
#define dc8051_ver_min(a) (a & 0x00ff)
#define dc8051_ver(a, b, c) ((a) << 16 | (b) << 8 | (c))
#define dc8051_ver_maj(a) (((a) & 0xff0000) >> 16)
#define dc8051_ver_min(a) (((a) & 0x00ff00) >> 8)
#define dc8051_ver_patch(a) ((a) & 0x0000ff)
/* f_put_tid types */
#define PT_EXPECTED 0
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
/*
* Copyright(c) 2015, 2016 Intel Corporation.
* Copyright(c) 2015, 2016, 2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
......@@ -104,11 +104,6 @@ DEFINE_EVENT(hfi1_rc_template, hfi1_ack,
TP_ARGS(qp, psn)
);
DEFINE_EVENT(hfi1_rc_template, hfi1_timeout,
TP_PROTO(struct rvt_qp *qp, u32 psn),
TP_ARGS(qp, psn)
);
DEFINE_EVENT(hfi1_rc_template, hfi1_rcv_error,
TP_PROTO(struct rvt_qp *qp, u32 psn),
TP_ARGS(qp, psn)
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册