提交 923f7f69 编写于 作者: J Jack Steiner 提交者: Linus Torvalds

GRU driver: minor updates

A few minor updates for the GRU driver.
	- documentation changes found in code reviews
	- changes to #ifdefs to make them recognized by "unifdef"
	  (used in simulator testing)
	- change GRU context load/unload to prefetch data

[akpm@linux-foundation.org: fix typo in comment]
Signed-off-by: NJack Steiner <steiner@sgi.com>
Signed-off-by: NAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: NLinus Torvalds <torvalds@linux-foundation.org>
上级 ebf3f09c
...@@ -30,9 +30,9 @@ ...@@ -30,9 +30,9 @@
/* /*
* Size used to map GRU GSeg * Size used to map GRU GSeg
*/ */
#if defined CONFIG_IA64 #if defined(CONFIG_IA64)
#define GRU_GSEG_PAGESIZE (256 * 1024UL) #define GRU_GSEG_PAGESIZE (256 * 1024UL)
#elif defined CONFIG_X86_64 #elif defined(CONFIG_X86_64)
#define GRU_GSEG_PAGESIZE (256 * 1024UL) /* ZZZ 2MB ??? */ #define GRU_GSEG_PAGESIZE (256 * 1024UL) /* ZZZ 2MB ??? */
#else #else
#error "Unsupported architecture" #error "Unsupported architecture"
......
...@@ -26,7 +26,7 @@ ...@@ -26,7 +26,7 @@
* Architecture dependent functions * Architecture dependent functions
*/ */
#if defined CONFIG_IA64 #if defined(CONFIG_IA64)
#include <linux/compiler.h> #include <linux/compiler.h>
#include <asm/intrinsics.h> #include <asm/intrinsics.h>
#define __flush_cache(p) ia64_fc(p) #define __flush_cache(p) ia64_fc(p)
...@@ -36,7 +36,7 @@ ...@@ -36,7 +36,7 @@
barrier(); \ barrier(); \
*((volatile int *)(p)) = v; /* force st.rel */ \ *((volatile int *)(p)) = v; /* force st.rel */ \
} while (0) } while (0)
#elif defined CONFIG_X86_64 #elif defined(CONFIG_X86_64)
#define __flush_cache(p) clflush(p) #define __flush_cache(p) clflush(p)
#define gru_ordered_store_int(p,v) \ #define gru_ordered_store_int(p,v) \
do { \ do { \
...@@ -299,6 +299,7 @@ static inline void gru_flush_cache(void *p) ...@@ -299,6 +299,7 @@ static inline void gru_flush_cache(void *p)
static inline void gru_start_instruction(struct gru_instruction *ins, int op32) static inline void gru_start_instruction(struct gru_instruction *ins, int op32)
{ {
gru_ordered_store_int(ins, op32); gru_ordered_store_int(ins, op32);
gru_flush_cache(ins);
} }
...@@ -604,8 +605,9 @@ static inline int gru_get_cb_substatus(void *cb) ...@@ -604,8 +605,9 @@ static inline int gru_get_cb_substatus(void *cb)
static inline int gru_check_status(void *cb) static inline int gru_check_status(void *cb)
{ {
struct gru_control_block_status *cbs = (void *)cb; struct gru_control_block_status *cbs = (void *)cb;
int ret = cbs->istatus; int ret;
ret = cbs->istatus;
if (ret == CBS_CALL_OS) if (ret == CBS_CALL_OS)
ret = gru_check_status_proc(cb); ret = gru_check_status_proc(cb);
return ret; return ret;
...@@ -617,7 +619,7 @@ static inline int gru_check_status(void *cb) ...@@ -617,7 +619,7 @@ static inline int gru_check_status(void *cb)
static inline int gru_wait(void *cb) static inline int gru_wait(void *cb)
{ {
struct gru_control_block_status *cbs = (void *)cb; struct gru_control_block_status *cbs = (void *)cb;
int ret = cbs->istatus;; int ret = cbs->istatus;
if (ret != CBS_IDLE) if (ret != CBS_IDLE)
ret = gru_wait_proc(cb); ret = gru_wait_proc(cb);
......
...@@ -214,12 +214,14 @@ static int non_atomic_pte_lookup(struct vm_area_struct *vma, ...@@ -214,12 +214,14 @@ static int non_atomic_pte_lookup(struct vm_area_struct *vma,
} }
/* /*
*
* atomic_pte_lookup * atomic_pte_lookup
* *
* Convert a user virtual address to a physical address * Convert a user virtual address to a physical address
* Only supports Intel large pages (2MB only) on x86_64. * Only supports Intel large pages (2MB only) on x86_64.
* ZZZ - hugepage support is incomplete * ZZZ - hugepage support is incomplete
*
* NOTE: mmap_sem is already held on entry to this function. This
* guarantees existence of the page tables.
*/ */
static int atomic_pte_lookup(struct vm_area_struct *vma, unsigned long vaddr, static int atomic_pte_lookup(struct vm_area_struct *vma, unsigned long vaddr,
int write, unsigned long *paddr, int *pageshift) int write, unsigned long *paddr, int *pageshift)
...@@ -229,9 +231,6 @@ static int atomic_pte_lookup(struct vm_area_struct *vma, unsigned long vaddr, ...@@ -229,9 +231,6 @@ static int atomic_pte_lookup(struct vm_area_struct *vma, unsigned long vaddr,
pud_t *pudp; pud_t *pudp;
pte_t pte; pte_t pte;
WARN_ON(irqs_disabled()); /* ZZZ debug */
local_irq_disable();
pgdp = pgd_offset(vma->vm_mm, vaddr); pgdp = pgd_offset(vma->vm_mm, vaddr);
if (unlikely(pgd_none(*pgdp))) if (unlikely(pgd_none(*pgdp)))
goto err; goto err;
...@@ -250,8 +249,6 @@ static int atomic_pte_lookup(struct vm_area_struct *vma, unsigned long vaddr, ...@@ -250,8 +249,6 @@ static int atomic_pte_lookup(struct vm_area_struct *vma, unsigned long vaddr,
#endif #endif
pte = *pte_offset_kernel(pmdp, vaddr); pte = *pte_offset_kernel(pmdp, vaddr);
local_irq_enable();
if (unlikely(!pte_present(pte) || if (unlikely(!pte_present(pte) ||
(write && (!pte_write(pte) || !pte_dirty(pte))))) (write && (!pte_write(pte) || !pte_dirty(pte)))))
return 1; return 1;
...@@ -324,6 +321,7 @@ static int gru_try_dropin(struct gru_thread_state *gts, ...@@ -324,6 +321,7 @@ static int gru_try_dropin(struct gru_thread_state *gts,
* Atomic lookup is faster & usually works even if called in non-atomic * Atomic lookup is faster & usually works even if called in non-atomic
* context. * context.
*/ */
rmb(); /* Must/check ms_range_active before loading PTEs */
ret = atomic_pte_lookup(vma, vaddr, write, &paddr, &pageshift); ret = atomic_pte_lookup(vma, vaddr, write, &paddr, &pageshift);
if (ret) { if (ret) {
if (!cb) if (!cb)
...@@ -543,6 +541,7 @@ int gru_get_exception_detail(unsigned long arg) ...@@ -543,6 +541,7 @@ int gru_get_exception_detail(unsigned long arg)
ucbnum = get_cb_number((void *)excdet.cb); ucbnum = get_cb_number((void *)excdet.cb);
cbrnum = thread_cbr_number(gts, ucbnum); cbrnum = thread_cbr_number(gts, ucbnum);
cbe = get_cbe_by_index(gts->ts_gru, cbrnum); cbe = get_cbe_by_index(gts->ts_gru, cbrnum);
prefetchw(cbe); /* Harmless on hardware, required for emulator */
excdet.opc = cbe->opccpy; excdet.opc = cbe->opccpy;
excdet.exopc = cbe->exopccpy; excdet.exopc = cbe->exopccpy;
excdet.ecause = cbe->ecause; excdet.ecause = cbe->ecause;
......
...@@ -113,7 +113,7 @@ static int gru_file_mmap(struct file *file, struct vm_area_struct *vma) ...@@ -113,7 +113,7 @@ static int gru_file_mmap(struct file *file, struct vm_area_struct *vma)
return -EPERM; return -EPERM;
if (vma->vm_start & (GRU_GSEG_PAGESIZE - 1) || if (vma->vm_start & (GRU_GSEG_PAGESIZE - 1) ||
vma->vm_end & (GRU_GSEG_PAGESIZE - 1)) vma->vm_end & (GRU_GSEG_PAGESIZE - 1))
return -EINVAL; return -EINVAL;
vma->vm_flags |= vma->vm_flags |=
...@@ -398,6 +398,12 @@ static int __init gru_init(void) ...@@ -398,6 +398,12 @@ static int __init gru_init(void)
irq = get_base_irq(); irq = get_base_irq();
for (chip = 0; chip < GRU_CHIPLETS_PER_BLADE; chip++) { for (chip = 0; chip < GRU_CHIPLETS_PER_BLADE; chip++) {
ret = request_irq(irq + chip, gru_intr, 0, id, NULL); ret = request_irq(irq + chip, gru_intr, 0, id, NULL);
/* TODO: fix irq handling on x86. For now ignore failures because
* interrupts are not required & not yet fully supported */
if (ret) {
printk("!!!WARNING: GRU ignoring request failure!!!\n");
ret = 0;
}
if (ret) { if (ret) {
printk(KERN_ERR "%s: request_irq failed\n", printk(KERN_ERR "%s: request_irq failed\n",
GRU_DRIVER_ID_STR); GRU_DRIVER_ID_STR);
......
...@@ -91,12 +91,7 @@ ...@@ -91,12 +91,7 @@
#define GSEGPOFF(h) ((h) & (GRU_SIZE - 1)) #define GSEGPOFF(h) ((h) & (GRU_SIZE - 1))
/* Convert an arbitrary handle address to the beginning of the GRU segment */ /* Convert an arbitrary handle address to the beginning of the GRU segment */
#ifndef __PLUGIN__
#define GRUBASE(h) ((void *)((unsigned long)(h) & ~(GRU_SIZE - 1))) #define GRUBASE(h) ((void *)((unsigned long)(h) & ~(GRU_SIZE - 1)))
#else
extern void *gmu_grubase(void *h);
#define GRUBASE(h) gmu_grubase(h)
#endif
/* General addressing macros. */ /* General addressing macros. */
static inline void *get_gseg_base_address(void *base, int ctxnum) static inline void *get_gseg_base_address(void *base, int ctxnum)
......
...@@ -122,6 +122,7 @@ int gru_get_cb_exception_detail(void *cb, ...@@ -122,6 +122,7 @@ int gru_get_cb_exception_detail(void *cb,
struct gru_control_block_extended *cbe; struct gru_control_block_extended *cbe;
cbe = get_cbe(GRUBASE(cb), get_cb_number(cb)); cbe = get_cbe(GRUBASE(cb), get_cb_number(cb));
prefetchw(cbe); /* Harmless on hardware, required for emulator */
excdet->opc = cbe->opccpy; excdet->opc = cbe->opccpy;
excdet->exopc = cbe->exopccpy; excdet->exopc = cbe->exopccpy;
excdet->ecause = cbe->ecause; excdet->ecause = cbe->ecause;
......
...@@ -432,29 +432,35 @@ static inline long gru_copy_handle(void *d, void *s) ...@@ -432,29 +432,35 @@ static inline long gru_copy_handle(void *d, void *s)
return GRU_HANDLE_BYTES; return GRU_HANDLE_BYTES;
} }
/* rewrite in assembly & use lots of prefetch */ static void gru_prefetch_context(void *gseg, void *cb, void *cbe, unsigned long cbrmap,
static void gru_load_context_data(void *save, void *grubase, int ctxnum, unsigned long length)
unsigned long cbrmap, unsigned long dsrmap)
{ {
void *gseg, *cb, *cbe;
unsigned long length;
int i, scr; int i, scr;
gseg = grubase + ctxnum * GRU_GSEG_STRIDE;
length = hweight64(dsrmap) * GRU_DSR_AU_BYTES;
prefetch_data(gseg + GRU_DS_BASE, length / GRU_CACHE_LINE_BYTES, prefetch_data(gseg + GRU_DS_BASE, length / GRU_CACHE_LINE_BYTES,
GRU_CACHE_LINE_BYTES); GRU_CACHE_LINE_BYTES);
cb = gseg + GRU_CB_BASE;
cbe = grubase + GRU_CBE_BASE;
for_each_cbr_in_allocation_map(i, &cbrmap, scr) { for_each_cbr_in_allocation_map(i, &cbrmap, scr) {
prefetch_data(cb, 1, GRU_CACHE_LINE_BYTES); prefetch_data(cb, 1, GRU_CACHE_LINE_BYTES);
prefetch_data(cbe + i * GRU_HANDLE_STRIDE, 1, prefetch_data(cbe + i * GRU_HANDLE_STRIDE, 1,
GRU_CACHE_LINE_BYTES); GRU_CACHE_LINE_BYTES);
cb += GRU_HANDLE_STRIDE; cb += GRU_HANDLE_STRIDE;
} }
}
static void gru_load_context_data(void *save, void *grubase, int ctxnum,
unsigned long cbrmap, unsigned long dsrmap)
{
void *gseg, *cb, *cbe;
unsigned long length;
int i, scr;
gseg = grubase + ctxnum * GRU_GSEG_STRIDE;
cb = gseg + GRU_CB_BASE; cb = gseg + GRU_CB_BASE;
cbe = grubase + GRU_CBE_BASE;
length = hweight64(dsrmap) * GRU_DSR_AU_BYTES;
gru_prefetch_context(gseg, cb, cbe, cbrmap, length);
for_each_cbr_in_allocation_map(i, &cbrmap, scr) { for_each_cbr_in_allocation_map(i, &cbrmap, scr) {
save += gru_copy_handle(cb, save); save += gru_copy_handle(cb, save);
save += gru_copy_handle(cbe + i * GRU_HANDLE_STRIDE, save); save += gru_copy_handle(cbe + i * GRU_HANDLE_STRIDE, save);
...@@ -472,15 +478,16 @@ static void gru_unload_context_data(void *save, void *grubase, int ctxnum, ...@@ -472,15 +478,16 @@ static void gru_unload_context_data(void *save, void *grubase, int ctxnum,
int i, scr; int i, scr;
gseg = grubase + ctxnum * GRU_GSEG_STRIDE; gseg = grubase + ctxnum * GRU_GSEG_STRIDE;
cb = gseg + GRU_CB_BASE; cb = gseg + GRU_CB_BASE;
cbe = grubase + GRU_CBE_BASE; cbe = grubase + GRU_CBE_BASE;
length = hweight64(dsrmap) * GRU_DSR_AU_BYTES;
gru_prefetch_context(gseg, cb, cbe, cbrmap, length);
for_each_cbr_in_allocation_map(i, &cbrmap, scr) { for_each_cbr_in_allocation_map(i, &cbrmap, scr) {
save += gru_copy_handle(save, cb); save += gru_copy_handle(save, cb);
save += gru_copy_handle(save, cbe + i * GRU_HANDLE_STRIDE); save += gru_copy_handle(save, cbe + i * GRU_HANDLE_STRIDE);
cb += GRU_HANDLE_STRIDE; cb += GRU_HANDLE_STRIDE;
} }
length = hweight64(dsrmap) * GRU_DSR_AU_BYTES;
memcpy(save, gseg + GRU_DS_BASE, length); memcpy(save, gseg + GRU_DS_BASE, length);
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册