diff --git a/arch/tile/include/asm/cacheflush.h b/arch/tile/include/asm/cacheflush.h index 14a3f8556ace46362a96c80851c4e656d5d7dcee..12fb0fb330ee95895360a5ddc4287e3efc0917d2 100644 --- a/arch/tile/include/asm/cacheflush.h +++ b/arch/tile/include/asm/cacheflush.h @@ -138,55 +138,12 @@ static inline void finv_buffer(void *buffer, size_t size) } /* - * Flush & invalidate a VA range that is homed remotely on a single core, - * waiting until the memory controller holds the flushed values. + * Flush and invalidate a VA range that is homed remotely, waiting + * until the memory controller holds the flushed values. If "hfh" is + * true, we will do a more expensive flush involving additional loads + * to make sure we have touched all the possible home cpus of a buffer + * that is homed with "hash for home". */ -static inline void finv_buffer_remote(void *buffer, size_t size) -{ - char *p; - int i; - - /* - * Flush and invalidate the buffer out of the local L1/L2 - * and request the home cache to flush and invalidate as well. - */ - __finv_buffer(buffer, size); - - /* - * Wait for the home cache to acknowledge that it has processed - * all the flush-and-invalidate requests. This does not mean - * that the flushed data has reached the memory controller yet, - * but it does mean the home cache is processing the flushes. - */ - __insn_mf(); - - /* - * Issue a load to the last cache line, which can't complete - * until all the previously-issued flushes to the same memory - * controller have also completed. If we weren't striping - * memory, that one load would be sufficient, but since we may - * be, we also need to back up to the last load issued to - * another memory controller, which would be the point where - * we crossed an 8KB boundary (the granularity of striping - * across memory controllers). Keep backing up and doing this - * until we are before the beginning of the buffer, or have - * hit all the controllers. - */ - for (i = 0, p = (char *)buffer + size - 1; - i < (1 << CHIP_LOG_NUM_MSHIMS()) && p >= (char *)buffer; - ++i) { - const unsigned long STRIPE_WIDTH = 8192; - - /* Force a load instruction to issue. */ - *(volatile char *)p; - - /* Jump to end of previous stripe. */ - p -= STRIPE_WIDTH; - p = (char *)((unsigned long)p | (STRIPE_WIDTH - 1)); - } - - /* Wait for the loads (and thus flushes) to have completed. */ - __insn_mf(); -} +void finv_buffer_remote(void *buffer, size_t size, int hfh); #endif /* _ASM_TILE_CACHEFLUSH_H */ diff --git a/arch/tile/lib/cacheflush.c b/arch/tile/lib/cacheflush.c index 11b6164c20970e9a47d87ad1a1a4a22a43c70574..35c1d8ca5f38132e9428db0297f22062dbbb68e4 100644 --- a/arch/tile/lib/cacheflush.c +++ b/arch/tile/lib/cacheflush.c @@ -21,3 +21,105 @@ void __flush_icache_range(unsigned long start, unsigned long end) { invalidate_icache((const void *)start, end - start, PAGE_SIZE); } + + +/* Force a load instruction to issue. */ +static inline void force_load(char *p) +{ + *(volatile char *)p; +} + +/* + * Flush and invalidate a VA range that is homed remotely on a single + * core (if "!hfh") or homed via hash-for-home (if "hfh"), waiting + * until the memory controller holds the flushed values. + */ +void finv_buffer_remote(void *buffer, size_t size, int hfh) +{ + char *p, *base; + size_t step_size, load_count; + const unsigned long STRIPE_WIDTH = 8192; + + /* + * Flush and invalidate the buffer out of the local L1/L2 + * and request the home cache to flush and invalidate as well. + */ + __finv_buffer(buffer, size); + + /* + * Wait for the home cache to acknowledge that it has processed + * all the flush-and-invalidate requests. This does not mean + * that the flushed data has reached the memory controller yet, + * but it does mean the home cache is processing the flushes. + */ + __insn_mf(); + + /* + * Issue a load to the last cache line, which can't complete + * until all the previously-issued flushes to the same memory + * controller have also completed. If we weren't striping + * memory, that one load would be sufficient, but since we may + * be, we also need to back up to the last load issued to + * another memory controller, which would be the point where + * we crossed an 8KB boundary (the granularity of striping + * across memory controllers). Keep backing up and doing this + * until we are before the beginning of the buffer, or have + * hit all the controllers. + * + * If we are flushing a hash-for-home buffer, it's even worse. + * Each line may be homed on a different tile, and each tile + * may have up to four lines that are on different + * controllers. So as we walk backwards, we have to touch + * enough cache lines to satisfy these constraints. In + * practice this ends up being close enough to "load from + * every cache line on a full memory stripe on each + * controller" that we simply do that, to simplify the logic. + * + * FIXME: See bug 9535 for some issues with this code. + */ + if (hfh) { + step_size = L2_CACHE_BYTES; + load_count = (STRIPE_WIDTH / L2_CACHE_BYTES) * + (1 << CHIP_LOG_NUM_MSHIMS()); + } else { + step_size = STRIPE_WIDTH; + load_count = (1 << CHIP_LOG_NUM_MSHIMS()); + } + + /* Load the last byte of the buffer. */ + p = (char *)buffer + size - 1; + force_load(p); + + /* Bump down to the end of the previous stripe or cache line. */ + p -= step_size; + p = (char *)((unsigned long)p | (step_size - 1)); + + /* Figure out how far back we need to go. */ + base = p - (step_size * (load_count - 2)); + if ((long)base < (long)buffer) + base = buffer; + + /* + * Fire all the loads we need. The MAF only has eight entries + * so we can have at most eight outstanding loads, so we + * unroll by that amount. + */ +#pragma unroll 8 + for (; p >= base; p -= step_size) + force_load(p); + + /* + * Repeat, but with inv's instead of loads, to get rid of the + * data we just loaded into our own cache and the old home L3. + * No need to unroll since inv's don't target a register. + */ + p = (char *)buffer + size - 1; + __insn_inv(p); + p -= step_size; + p = (char *)((unsigned long)p | (step_size - 1)); + for (; p >= base; p -= step_size) + __insn_inv(p); + + /* Wait for the load+inv's (and thus finvs) to have completed. */ + __insn_mf(); +} diff --git a/arch/tile/mm/homecache.c b/arch/tile/mm/homecache.c index d78df3a6ee15e0d30cd2207d1f64df88de2ec590..f344f4fc734235c6e1aaae9cadb1a8ffc2f9a914 100644 --- a/arch/tile/mm/homecache.c +++ b/arch/tile/mm/homecache.c @@ -179,23 +179,46 @@ void flush_remote(unsigned long cache_pfn, unsigned long cache_control, panic("Unsafe to continue."); } +void flush_remote_page(struct page *page, int order) +{ + int i, pages = (1 << order); + for (i = 0; i < pages; ++i, ++page) { + void *p = kmap_atomic(page); + int hfh = 0; + int home = page_home(page); +#if CHIP_HAS_CBOX_HOME_MAP() + if (home == PAGE_HOME_HASH) + hfh = 1; + else +#endif + BUG_ON(home < 0 || home >= NR_CPUS); + finv_buffer_remote(p, PAGE_SIZE, hfh); + kunmap_atomic(p); + } +} + void homecache_evict(const struct cpumask *mask) { flush_remote(0, HV_FLUSH_EVICT_L2, mask, 0, 0, 0, NULL, NULL, 0); } -/* Return a mask of the cpus whose caches currently own these pages. */ -static void homecache_mask(struct page *page, int pages, - struct cpumask *home_mask) +/* + * Return a mask of the cpus whose caches currently own these pages. + * The return value is whether the pages are all coherently cached + * (i.e. none are immutable, incoherent, or uncached). + */ +static int homecache_mask(struct page *page, int pages, + struct cpumask *home_mask) { int i; + int cached_coherently = 1; cpumask_clear(home_mask); for (i = 0; i < pages; ++i) { int home = page_home(&page[i]); if (home == PAGE_HOME_IMMUTABLE || home == PAGE_HOME_INCOHERENT) { cpumask_copy(home_mask, cpu_possible_mask); - return; + return 0; } #if CHIP_HAS_CBOX_HOME_MAP() if (home == PAGE_HOME_HASH) { @@ -203,11 +226,14 @@ static void homecache_mask(struct page *page, int pages, continue; } #endif - if (home == PAGE_HOME_UNCACHED) + if (home == PAGE_HOME_UNCACHED) { + cached_coherently = 0; continue; + } BUG_ON(home < 0 || home >= NR_CPUS); cpumask_set_cpu(home, home_mask); } + return cached_coherently; } /* diff --git a/drivers/net/tile/tilepro.c b/drivers/net/tile/tilepro.c index 7cb301da747440dd9d14d8e68aced335450b2be0..f9012992d21e5ccf973d7ab5df2bd15251de7ded 100644 --- a/drivers/net/tile/tilepro.c +++ b/drivers/net/tile/tilepro.c @@ -1620,7 +1620,7 @@ static unsigned int tile_net_tx_frags(lepp_frag_t *frags, if (b_len != 0) { if (!hash_default) - finv_buffer_remote(b_data, b_len); + finv_buffer_remote(b_data, b_len, 0); cpa = __pa(b_data); frags[n].cpa_lo = cpa; @@ -1643,7 +1643,7 @@ static unsigned int tile_net_tx_frags(lepp_frag_t *frags, if (!hash_default) { void *va = pfn_to_kaddr(pfn) + f->page_offset; BUG_ON(PageHighMem(f->page)); - finv_buffer_remote(va, f->size); + finv_buffer_remote(va, f->size, 0); } cpa = ((phys_addr_t)pfn << PAGE_SHIFT) + f->page_offset;