提交 482f0777 编写于 作者: F Felix Kuehling 提交者: Oded Gabbay

drm/amdkfd: Simplify event ID and signal slot management

Signal slots are identical to event IDs.

Replace the used_slot_bitmap and events hash table with an IDR to
allocate and lookup event IDs and signal slots more efficiently.
Signed-off-by: NFelix Kuehling <Felix.Kuehling@amd.com>
Acked-by: NOded Gabbay <oded.gabbay@gmail.com>
Signed-off-by: NOded Gabbay <oded.gabbay@gmail.com>
上级 50cb7dd9
...@@ -41,24 +41,16 @@ struct kfd_event_waiter { ...@@ -41,24 +41,16 @@ struct kfd_event_waiter {
bool activated; /* Becomes true when event is signaled */ bool activated; /* Becomes true when event is signaled */
}; };
#define SLOTS_PER_PAGE KFD_SIGNAL_EVENT_LIMIT
#define SLOT_BITMAP_LONGS BITS_TO_LONGS(SLOTS_PER_PAGE)
/* /*
* Over-complicated pooled allocator for event notification slots.
*
* Each signal event needs a 64-bit signal slot where the signaler will write * Each signal event needs a 64-bit signal slot where the signaler will write
* a 1 before sending an interrupt.l (This is needed because some interrupts * a 1 before sending an interrupt. (This is needed because some interrupts
* do not contain enough spare data bits to identify an event.) * do not contain enough spare data bits to identify an event.)
* We get whole pages from vmalloc and map them to the process VA. * We get whole pages and map them to the process VA.
* Individual signal events are then allocated a slot in a page. * Individual signal events use their event_id as slot index.
*/ */
struct kfd_signal_page { struct kfd_signal_page {
uint64_t *kernel_address; uint64_t *kernel_address;
uint64_t __user *user_address; uint64_t __user *user_address;
unsigned int free_slots;
unsigned long used_slot_bitmap[SLOT_BITMAP_LONGS];
}; };
/* /*
...@@ -73,34 +65,6 @@ static uint64_t *page_slots(struct kfd_signal_page *page) ...@@ -73,34 +65,6 @@ static uint64_t *page_slots(struct kfd_signal_page *page)
return page->kernel_address; return page->kernel_address;
} }
static bool allocate_free_slot(struct kfd_process *process,
unsigned int *out_slot_index)
{
struct kfd_signal_page *page = process->signal_page;
unsigned int slot;
if (!page || page->free_slots == 0) {
pr_debug("No free event signal slots were found for process %p\n",
process);
return false;
}
slot = find_first_zero_bit(page->used_slot_bitmap, SLOTS_PER_PAGE);
__set_bit(slot, page->used_slot_bitmap);
page->free_slots--;
page_slots(page)[slot] = UNSIGNALED_EVENT_SLOT;
*out_slot_index = slot;
pr_debug("Allocated event signal slot in page %p, slot %d\n",
page, slot);
return true;
}
static struct kfd_signal_page *allocate_signal_page(struct kfd_process *p) static struct kfd_signal_page *allocate_signal_page(struct kfd_process *p)
{ {
void *backing_store; void *backing_store;
...@@ -110,8 +74,6 @@ static struct kfd_signal_page *allocate_signal_page(struct kfd_process *p) ...@@ -110,8 +74,6 @@ static struct kfd_signal_page *allocate_signal_page(struct kfd_process *p)
if (!page) if (!page)
return NULL; return NULL;
page->free_slots = SLOTS_PER_PAGE;
backing_store = (void *) __get_free_pages(GFP_KERNEL, backing_store = (void *) __get_free_pages(GFP_KERNEL,
get_order(KFD_SIGNAL_EVENT_LIMIT * 8)); get_order(KFD_SIGNAL_EVENT_LIMIT * 8));
if (!backing_store) if (!backing_store)
...@@ -132,28 +94,26 @@ static struct kfd_signal_page *allocate_signal_page(struct kfd_process *p) ...@@ -132,28 +94,26 @@ static struct kfd_signal_page *allocate_signal_page(struct kfd_process *p)
return NULL; return NULL;
} }
static bool allocate_event_notification_slot(struct kfd_process *p, static int allocate_event_notification_slot(struct kfd_process *p,
unsigned int *signal_slot_index) struct kfd_event *ev)
{ {
int id;
if (!p->signal_page) { if (!p->signal_page) {
p->signal_page = allocate_signal_page(p); p->signal_page = allocate_signal_page(p);
if (!p->signal_page) if (!p->signal_page)
return false; return -ENOMEM;
} }
return allocate_free_slot(p, signal_slot_index); id = idr_alloc(&p->event_idr, ev, 0, KFD_SIGNAL_EVENT_LIMIT,
} GFP_KERNEL);
if (id < 0)
return id;
/* Assumes that the process's event_mutex is locked. */ ev->event_id = id;
static void release_event_notification_slot(struct kfd_signal_page *page, page_slots(p->signal_page)[id] = UNSIGNALED_EVENT_SLOT;
size_t slot_index)
{
__clear_bit(slot_index, page->used_slot_bitmap);
page->free_slots++;
/* We don't free signal pages, they are retained by the process return 0;
* and reused until it exits.
*/
} }
/* /*
...@@ -162,89 +122,32 @@ static void release_event_notification_slot(struct kfd_signal_page *page, ...@@ -162,89 +122,32 @@ static void release_event_notification_slot(struct kfd_signal_page *page,
*/ */
static struct kfd_event *lookup_event_by_id(struct kfd_process *p, uint32_t id) static struct kfd_event *lookup_event_by_id(struct kfd_process *p, uint32_t id)
{ {
struct kfd_event *ev; return idr_find(&p->event_idr, id);
hash_for_each_possible(p->events, ev, events, id)
if (ev->event_id == id)
return ev;
return NULL;
}
/*
* Produce a kfd event id for a nonsignal event.
* These are arbitrary numbers, so we do a sequential search through
* the hash table for an unused number.
*/
static u32 make_nonsignal_event_id(struct kfd_process *p)
{
u32 id;
for (id = p->next_nonsignal_event_id;
id < KFD_LAST_NONSIGNAL_EVENT_ID &&
lookup_event_by_id(p, id);
id++)
;
if (id < KFD_LAST_NONSIGNAL_EVENT_ID) {
/*
* What if id == LAST_NONSIGNAL_EVENT_ID - 1?
* Then next_nonsignal_event_id = LAST_NONSIGNAL_EVENT_ID so
* the first loop fails immediately and we proceed with the
* wraparound loop below.
*/
p->next_nonsignal_event_id = id + 1;
return id;
}
for (id = KFD_FIRST_NONSIGNAL_EVENT_ID;
id < KFD_LAST_NONSIGNAL_EVENT_ID &&
lookup_event_by_id(p, id);
id++)
;
if (id < KFD_LAST_NONSIGNAL_EVENT_ID) {
p->next_nonsignal_event_id = id + 1;
return id;
}
p->next_nonsignal_event_id = KFD_FIRST_NONSIGNAL_EVENT_ID;
return 0;
}
static struct kfd_event *lookup_event_by_page_slot(struct kfd_process *p,
unsigned int signal_slot)
{
return lookup_event_by_id(p, signal_slot);
} }
static int create_signal_event(struct file *devkfd, static int create_signal_event(struct file *devkfd,
struct kfd_process *p, struct kfd_process *p,
struct kfd_event *ev) struct kfd_event *ev)
{ {
int ret;
if (p->signal_event_count == KFD_SIGNAL_EVENT_LIMIT) { if (p->signal_event_count == KFD_SIGNAL_EVENT_LIMIT) {
if (!p->signal_event_limit_reached) { if (!p->signal_event_limit_reached) {
pr_warn("Signal event wasn't created because limit was reached\n"); pr_warn("Signal event wasn't created because limit was reached\n");
p->signal_event_limit_reached = true; p->signal_event_limit_reached = true;
} }
return -ENOMEM; return -ENOSPC;
} }
if (!allocate_event_notification_slot(p, &ev->signal_slot_index)) { ret = allocate_event_notification_slot(p, ev);
if (ret) {
pr_warn("Signal event wasn't created because out of kernel memory\n"); pr_warn("Signal event wasn't created because out of kernel memory\n");
return -ENOMEM; return ret;
} }
p->signal_event_count++; p->signal_event_count++;
ev->user_signal_address = ev->user_signal_address = &p->signal_page->user_address[ev->event_id];
&p->signal_page->user_address[ev->signal_slot_index];
ev->event_id = ev->signal_slot_index;
pr_debug("Signal event number %zu created with id %d, address %p\n", pr_debug("Signal event number %zu created with id %d, address %p\n",
p->signal_event_count, ev->event_id, p->signal_event_count, ev->event_id,
ev->user_signal_address); ev->user_signal_address);
...@@ -252,16 +155,20 @@ static int create_signal_event(struct file *devkfd, ...@@ -252,16 +155,20 @@ static int create_signal_event(struct file *devkfd,
return 0; return 0;
} }
/*
* No non-signal events are supported yet.
* We create them as events that never signal.
* Set event calls from user-mode are failed.
*/
static int create_other_event(struct kfd_process *p, struct kfd_event *ev) static int create_other_event(struct kfd_process *p, struct kfd_event *ev)
{ {
ev->event_id = make_nonsignal_event_id(p); /* Cast KFD_LAST_NONSIGNAL_EVENT to uint32_t. This allows an
if (ev->event_id == 0) * intentional integer overflow to -1 without a compiler
return -ENOMEM; * warning. idr_alloc treats a negative value as "maximum
* signed integer".
*/
int id = idr_alloc(&p->event_idr, ev, KFD_FIRST_NONSIGNAL_EVENT_ID,
(uint32_t)KFD_LAST_NONSIGNAL_EVENT_ID + 1,
GFP_KERNEL);
if (id < 0)
return id;
ev->event_id = id;
return 0; return 0;
} }
...@@ -269,9 +176,8 @@ static int create_other_event(struct kfd_process *p, struct kfd_event *ev) ...@@ -269,9 +176,8 @@ static int create_other_event(struct kfd_process *p, struct kfd_event *ev)
void kfd_event_init_process(struct kfd_process *p) void kfd_event_init_process(struct kfd_process *p)
{ {
mutex_init(&p->event_mutex); mutex_init(&p->event_mutex);
hash_init(p->events); idr_init(&p->event_idr);
p->signal_page = NULL; p->signal_page = NULL;
p->next_nonsignal_event_id = KFD_FIRST_NONSIGNAL_EVENT_ID;
p->signal_event_count = 0; p->signal_event_count = 0;
} }
...@@ -284,25 +190,22 @@ static void destroy_event(struct kfd_process *p, struct kfd_event *ev) ...@@ -284,25 +190,22 @@ static void destroy_event(struct kfd_process *p, struct kfd_event *ev)
waiter->event = NULL; waiter->event = NULL;
wake_up_all(&ev->wq); wake_up_all(&ev->wq);
if ((ev->type == KFD_EVENT_TYPE_SIGNAL || if (ev->type == KFD_EVENT_TYPE_SIGNAL ||
ev->type == KFD_EVENT_TYPE_DEBUG) && p->signal_page) { ev->type == KFD_EVENT_TYPE_DEBUG)
release_event_notification_slot(p->signal_page,
ev->signal_slot_index);
p->signal_event_count--; p->signal_event_count--;
}
hash_del(&ev->events); idr_remove(&p->event_idr, ev->event_id);
kfree(ev); kfree(ev);
} }
static void destroy_events(struct kfd_process *p) static void destroy_events(struct kfd_process *p)
{ {
struct kfd_event *ev; struct kfd_event *ev;
struct hlist_node *tmp; uint32_t id;
unsigned int hash_bkt;
hash_for_each_safe(p->events, hash_bkt, tmp, ev, events) idr_for_each_entry(&p->event_idr, ev, id)
destroy_event(p, ev); destroy_event(p, ev);
idr_destroy(&p->event_idr);
} }
/* /*
...@@ -365,7 +268,7 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p, ...@@ -365,7 +268,7 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p,
if (!ret) { if (!ret) {
*event_page_offset = KFD_MMAP_EVENTS_MASK; *event_page_offset = KFD_MMAP_EVENTS_MASK;
*event_page_offset <<= PAGE_SHIFT; *event_page_offset <<= PAGE_SHIFT;
*event_slot_index = ev->signal_slot_index; *event_slot_index = ev->event_id;
} }
break; break;
default: default:
...@@ -374,8 +277,6 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p, ...@@ -374,8 +277,6 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p,
} }
if (!ret) { if (!ret) {
hash_add(p->events, &ev->events, ev->event_id);
*event_id = ev->event_id; *event_id = ev->event_id;
*event_trigger_data = ev->event_id; *event_trigger_data = ev->event_id;
} else { } else {
...@@ -469,17 +370,7 @@ int kfd_reset_event(struct kfd_process *p, uint32_t event_id) ...@@ -469,17 +370,7 @@ int kfd_reset_event(struct kfd_process *p, uint32_t event_id)
static void acknowledge_signal(struct kfd_process *p, struct kfd_event *ev) static void acknowledge_signal(struct kfd_process *p, struct kfd_event *ev)
{ {
page_slots(p->signal_page)[ev->signal_slot_index] = page_slots(p->signal_page)[ev->event_id] = UNSIGNALED_EVENT_SLOT;
UNSIGNALED_EVENT_SLOT;
}
static bool is_slot_signaled(struct kfd_process *p, unsigned int index)
{
if (!p->signal_page)
return false;
else
return page_slots(p->signal_page)[index] !=
UNSIGNALED_EVENT_SLOT;
} }
static void set_event_from_interrupt(struct kfd_process *p, static void set_event_from_interrupt(struct kfd_process *p,
...@@ -518,13 +409,31 @@ void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id, ...@@ -518,13 +409,31 @@ void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id,
* ignore it, but we could use any bits we did receive to * ignore it, but we could use any bits we did receive to
* search faster. * search faster.
*/ */
unsigned int i; uint64_t *slots = page_slots(p->signal_page);
uint32_t id;
if (p->signal_event_count < KFD_SIGNAL_EVENT_LIMIT/2) {
/* With relatively few events, it's faster to
* iterate over the event IDR
*/
idr_for_each_entry(&p->event_idr, ev, id) {
if (id >= KFD_SIGNAL_EVENT_LIMIT)
break;
for (i = 0; i < SLOTS_PER_PAGE; i++) if (slots[id] != UNSIGNALED_EVENT_SLOT)
if (is_slot_signaled(p, i)) { set_event_from_interrupt(p, ev);
ev = lookup_event_by_page_slot(p, i);
set_event_from_interrupt(p, ev);
} }
} else {
/* With relatively many events, it's faster to
* iterate over the signal slots and lookup
* only signaled events from the IDR.
*/
for (id = 0; id < KFD_SIGNAL_EVENT_LIMIT; id++)
if (slots[id] != UNSIGNALED_EVENT_SLOT) {
ev = lookup_event_by_id(p, id);
set_event_from_interrupt(p, ev);
}
}
} }
mutex_unlock(&p->event_mutex); mutex_unlock(&p->event_mutex);
...@@ -836,12 +745,13 @@ static void lookup_events_by_type_and_signal(struct kfd_process *p, ...@@ -836,12 +745,13 @@ static void lookup_events_by_type_and_signal(struct kfd_process *p,
{ {
struct kfd_hsa_memory_exception_data *ev_data; struct kfd_hsa_memory_exception_data *ev_data;
struct kfd_event *ev; struct kfd_event *ev;
int bkt; uint32_t id;
bool send_signal = true; bool send_signal = true;
ev_data = (struct kfd_hsa_memory_exception_data *) event_data; ev_data = (struct kfd_hsa_memory_exception_data *) event_data;
hash_for_each(p->events, bkt, ev, events) id = KFD_FIRST_NONSIGNAL_EVENT_ID;
idr_for_each_entry_continue(&p->event_idr, ev, id)
if (ev->type == type) { if (ev->type == type) {
send_signal = false; send_signal = false;
dev_dbg(kfd_device, dev_dbg(kfd_device,
......
...@@ -31,9 +31,13 @@ ...@@ -31,9 +31,13 @@
#include "kfd_priv.h" #include "kfd_priv.h"
#include <uapi/linux/kfd_ioctl.h> #include <uapi/linux/kfd_ioctl.h>
#define KFD_EVENT_ID_NONSIGNAL_MASK 0x80000000U /*
#define KFD_FIRST_NONSIGNAL_EVENT_ID KFD_EVENT_ID_NONSIGNAL_MASK * IDR supports non-negative integer IDs. Small IDs are used for
#define KFD_LAST_NONSIGNAL_EVENT_ID UINT_MAX * signal events to match their signal slot. Use the upper half of the
* ID space for non-signal events.
*/
#define KFD_FIRST_NONSIGNAL_EVENT_ID ((INT_MAX >> 1) + 1)
#define KFD_LAST_NONSIGNAL_EVENT_ID INT_MAX
/* /*
* Written into kfd_signal_slot_t to indicate that the event is not signaled. * Written into kfd_signal_slot_t to indicate that the event is not signaled.
...@@ -47,9 +51,6 @@ struct kfd_event_waiter; ...@@ -47,9 +51,6 @@ struct kfd_event_waiter;
struct signal_page; struct signal_page;
struct kfd_event { struct kfd_event {
/* All events in process, rooted at kfd_process.events. */
struct hlist_node events;
u32 event_id; u32 event_id;
bool signaled; bool signaled;
...@@ -60,7 +61,6 @@ struct kfd_event { ...@@ -60,7 +61,6 @@ struct kfd_event {
wait_queue_head_t wq; /* List of event waiters. */ wait_queue_head_t wq; /* List of event waiters. */
/* Only for signal events. */ /* Only for signal events. */
unsigned int signal_slot_index;
uint64_t __user *user_signal_address; uint64_t __user *user_signal_address;
/* type specific data */ /* type specific data */
......
...@@ -31,6 +31,7 @@ ...@@ -31,6 +31,7 @@
#include <linux/workqueue.h> #include <linux/workqueue.h>
#include <linux/spinlock.h> #include <linux/spinlock.h>
#include <linux/kfd_ioctl.h> #include <linux/kfd_ioctl.h>
#include <linux/idr.h>
#include <kgd_kfd_interface.h> #include <kgd_kfd_interface.h>
#include "amd_shared.h" #include "amd_shared.h"
...@@ -538,11 +539,10 @@ struct kfd_process { ...@@ -538,11 +539,10 @@ struct kfd_process {
/* Event-related data */ /* Event-related data */
struct mutex event_mutex; struct mutex event_mutex;
/* All events in process hashed by ID, linked on kfd_event.events. */ /* Event ID allocator and lookup */
DECLARE_HASHTABLE(events, 4); struct idr event_idr;
/* Event page */ /* Event page */
struct kfd_signal_page *signal_page; struct kfd_signal_page *signal_page;
u32 next_nonsignal_event_id;
size_t signal_event_count; size_t signal_event_count;
bool signal_event_limit_reached; bool signal_event_limit_reached;
}; };
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册