diff options
Diffstat (limited to 'kernel/arch/metag/mm/cache.c')
-rw-r--r-- | kernel/arch/metag/mm/cache.c | 521 |
1 files changed, 521 insertions, 0 deletions
diff --git a/kernel/arch/metag/mm/cache.c b/kernel/arch/metag/mm/cache.c new file mode 100644 index 000000000..a62285284 --- /dev/null +++ b/kernel/arch/metag/mm/cache.c @@ -0,0 +1,521 @@ +/* + * arch/metag/mm/cache.c + * + * Copyright (C) 2001, 2002, 2005, 2007, 2012 Imagination Technologies. + * + * This program is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License version 2 as published by the + * Free Software Foundation. + * + * Cache control code + */ + +#include <linux/export.h> +#include <linux/io.h> +#include <asm/cacheflush.h> +#include <asm/core_reg.h> +#include <asm/global_lock.h> +#include <asm/metag_isa.h> +#include <asm/metag_mem.h> +#include <asm/metag_regs.h> + +#define DEFAULT_CACHE_WAYS_LOG2 2 + +/* + * Size of a set in the caches. Initialised for default 16K stride, adjusted + * according to values passed through TBI global heap segment via LDLK (on ATP) + * or config registers (on HTP/MTP) + */ +static int dcache_set_shift = METAG_TBI_CACHE_SIZE_BASE_LOG2 + - DEFAULT_CACHE_WAYS_LOG2; +static int icache_set_shift = METAG_TBI_CACHE_SIZE_BASE_LOG2 + - DEFAULT_CACHE_WAYS_LOG2; +/* + * The number of sets in the caches. Initialised for HTP/ATP, adjusted + * according to NOMMU setting in config registers + */ +static unsigned char dcache_sets_log2 = DEFAULT_CACHE_WAYS_LOG2; +static unsigned char icache_sets_log2 = DEFAULT_CACHE_WAYS_LOG2; + +#ifndef CONFIG_METAG_META12 +/** + * metag_lnkget_probe() - Probe whether lnkget/lnkset go around the cache + */ +static volatile u32 lnkget_testdata[16] __initdata __aligned(64); + +#define LNKGET_CONSTANT 0xdeadbeef + +static void __init metag_lnkget_probe(void) +{ + int temp; + long flags; + + /* + * It's conceivable the user has configured a globally coherent cache + * shared with non-Linux hardware threads, so use LOCK2 to prevent them + * from executing and causing cache eviction during the test. + */ + __global_lock2(flags); + + /* read a value to bring it into the cache */ + (void)lnkget_testdata[0]; + lnkget_testdata[0] = 0; + + /* lnkget/lnkset it to modify it */ + asm volatile( + "1: LNKGETD %0, [%1]\n" + " LNKSETD [%1], %2\n" + " DEFR %0, TXSTAT\n" + " ANDT %0, %0, #HI(0x3f000000)\n" + " CMPT %0, #HI(0x02000000)\n" + " BNZ 1b\n" + : "=&d" (temp) + : "da" (&lnkget_testdata[0]), "bd" (LNKGET_CONSTANT) + : "cc"); + + /* re-read it to see if the cached value changed */ + temp = lnkget_testdata[0]; + + __global_unlock2(flags); + + /* flush the cache line to fix any incoherency */ + __builtin_dcache_flush((void *)&lnkget_testdata[0]); + +#if defined(CONFIG_METAG_LNKGET_AROUND_CACHE) + /* if the cache is right, LNKGET_AROUND_CACHE is unnecessary */ + if (temp == LNKGET_CONSTANT) + pr_info("LNKGET/SET go through cache but CONFIG_METAG_LNKGET_AROUND_CACHE=y\n"); +#elif defined(CONFIG_METAG_ATOMICITY_LNKGET) + /* + * if the cache is wrong, LNKGET_AROUND_CACHE is really necessary + * because the kernel is configured to use LNKGET/SET for atomicity + */ + WARN(temp != LNKGET_CONSTANT, + "LNKGET/SET go around cache but CONFIG_METAG_LNKGET_AROUND_CACHE=n\n" + "Expect kernel failure as it's used for atomicity primitives\n"); +#elif defined(CONFIG_SMP) + /* + * if the cache is wrong, LNKGET_AROUND_CACHE should be used or the + * gateway page won't flush and userland could break. + */ + WARN(temp != LNKGET_CONSTANT, + "LNKGET/SET go around cache but CONFIG_METAG_LNKGET_AROUND_CACHE=n\n" + "Expect userland failure as it's used for user gateway page\n"); +#else + /* + * if the cache is wrong, LNKGET_AROUND_CACHE is set wrong, but it + * doesn't actually matter as it doesn't have any effect on !SMP && + * !ATOMICITY_LNKGET. + */ + if (temp != LNKGET_CONSTANT) + pr_warn("LNKGET/SET go around cache but CONFIG_METAG_LNKGET_AROUND_CACHE=n\n"); +#endif +} +#endif /* !CONFIG_METAG_META12 */ + +/** + * metag_cache_probe() - Probe L1 cache configuration. + * + * Probe the L1 cache configuration to aid the L1 physical cache flushing + * functions. + */ +void __init metag_cache_probe(void) +{ +#ifndef CONFIG_METAG_META12 + int coreid = metag_in32(METAC_CORE_ID); + int config = metag_in32(METAC_CORE_CONFIG2); + int cfgcache = coreid & METAC_COREID_CFGCACHE_BITS; + + if (cfgcache == METAC_COREID_CFGCACHE_TYPE0 || + cfgcache == METAC_COREID_CFGCACHE_PRIVNOMMU) { + icache_sets_log2 = 1; + dcache_sets_log2 = 1; + } + + /* For normal size caches, the smallest size is 4Kb. + For small caches, the smallest size is 64b */ + icache_set_shift = (config & METAC_CORECFG2_ICSMALL_BIT) + ? 6 : 12; + icache_set_shift += (config & METAC_CORE_C2ICSZ_BITS) + >> METAC_CORE_C2ICSZ_S; + icache_set_shift -= icache_sets_log2; + + dcache_set_shift = (config & METAC_CORECFG2_DCSMALL_BIT) + ? 6 : 12; + dcache_set_shift += (config & METAC_CORECFG2_DCSZ_BITS) + >> METAC_CORECFG2_DCSZ_S; + dcache_set_shift -= dcache_sets_log2; + + metag_lnkget_probe(); +#else + /* Extract cache sizes from global heap segment */ + unsigned long val, u; + int width, shift, addend; + PTBISEG seg; + + seg = __TBIFindSeg(NULL, TBID_SEG(TBID_THREAD_GLOBAL, + TBID_SEGSCOPE_GLOBAL, + TBID_SEGTYPE_HEAP)); + if (seg != NULL) { + val = seg->Data[1]; + + /* Work out width of I-cache size bit-field */ + u = ((unsigned long) METAG_TBI_ICACHE_SIZE_BITS) + >> METAG_TBI_ICACHE_SIZE_S; + width = 0; + while (u & 1) { + width++; + u >>= 1; + } + /* Extract sign-extended size addend value */ + shift = 32 - (METAG_TBI_ICACHE_SIZE_S + width); + addend = (long) ((val & METAG_TBI_ICACHE_SIZE_BITS) + << shift) + >> (shift + METAG_TBI_ICACHE_SIZE_S); + /* Now calculate I-cache set size */ + icache_set_shift = (METAG_TBI_CACHE_SIZE_BASE_LOG2 + - DEFAULT_CACHE_WAYS_LOG2) + + addend; + + /* Similarly for D-cache */ + u = ((unsigned long) METAG_TBI_DCACHE_SIZE_BITS) + >> METAG_TBI_DCACHE_SIZE_S; + width = 0; + while (u & 1) { + width++; + u >>= 1; + } + shift = 32 - (METAG_TBI_DCACHE_SIZE_S + width); + addend = (long) ((val & METAG_TBI_DCACHE_SIZE_BITS) + << shift) + >> (shift + METAG_TBI_DCACHE_SIZE_S); + dcache_set_shift = (METAG_TBI_CACHE_SIZE_BASE_LOG2 + - DEFAULT_CACHE_WAYS_LOG2) + + addend; + } +#endif +} + +static void metag_phys_data_cache_flush(const void *start) +{ + unsigned long flush0, flush1, flush2, flush3; + int loops, step; + int thread; + int part, offset; + int set_shift; + + /* Use a sequence of writes to flush the cache region requested */ + thread = (__core_reg_get(TXENABLE) & TXENABLE_THREAD_BITS) + >> TXENABLE_THREAD_S; + + /* Cache is broken into sets which lie in contiguous RAMs */ + set_shift = dcache_set_shift; + + /* Move to the base of the physical cache flush region */ + flush0 = LINSYSCFLUSH_DCACHE_LINE; + step = 64; + + /* Get partition data for this thread */ + part = metag_in32(SYSC_DCPART0 + + (SYSC_xCPARTn_STRIDE * thread)); + + if ((int)start < 0) + /* Access Global vs Local partition */ + part >>= SYSC_xCPARTG_AND_S + - SYSC_xCPARTL_AND_S; + + /* Extract offset and move SetOff */ + offset = (part & SYSC_xCPARTL_OR_BITS) + >> SYSC_xCPARTL_OR_S; + flush0 += (offset << (set_shift - 4)); + + /* Shrink size */ + part = (part & SYSC_xCPARTL_AND_BITS) + >> SYSC_xCPARTL_AND_S; + loops = ((part + 1) << (set_shift - 4)); + + /* Reduce loops by step of cache line size */ + loops /= step; + + flush1 = flush0 + (1 << set_shift); + flush2 = flush0 + (2 << set_shift); + flush3 = flush0 + (3 << set_shift); + + if (dcache_sets_log2 == 1) { + flush2 = flush1; + flush3 = flush1 + step; + flush1 = flush0 + step; + step <<= 1; + loops >>= 1; + } + + /* Clear loops ways in cache */ + while (loops-- != 0) { + /* Clear the ways. */ +#if 0 + /* + * GCC doesn't generate very good code for this so we + * provide inline assembly instead. + */ + metag_out8(0, flush0); + metag_out8(0, flush1); + metag_out8(0, flush2); + metag_out8(0, flush3); + + flush0 += step; + flush1 += step; + flush2 += step; + flush3 += step; +#else + asm volatile ( + "SETB\t[%0+%4++],%5\n" + "SETB\t[%1+%4++],%5\n" + "SETB\t[%2+%4++],%5\n" + "SETB\t[%3+%4++],%5\n" + : "+e" (flush0), + "+e" (flush1), + "+e" (flush2), + "+e" (flush3) + : "e" (step), "a" (0)); +#endif + } +} + +void metag_data_cache_flush_all(const void *start) +{ + if ((metag_in32(SYSC_CACHE_MMU_CONFIG) & SYSC_CMMUCFG_DC_ON_BIT) == 0) + /* No need to flush the data cache it's not actually enabled */ + return; + + metag_phys_data_cache_flush(start); +} + +void metag_data_cache_flush(const void *start, int bytes) +{ + unsigned long flush0; + int loops, step; + + if ((metag_in32(SYSC_CACHE_MMU_CONFIG) & SYSC_CMMUCFG_DC_ON_BIT) == 0) + /* No need to flush the data cache it's not actually enabled */ + return; + + if (bytes >= 4096) { + metag_phys_data_cache_flush(start); + return; + } + + /* Use linear cache flush mechanism on META IP */ + flush0 = (int)start; + loops = ((int)start & (DCACHE_LINE_BYTES - 1)) + bytes + + (DCACHE_LINE_BYTES - 1); + loops >>= DCACHE_LINE_S; + +#define PRIM_FLUSH(addr, offset) do { \ + int __addr = ((int) (addr)) + ((offset) * 64); \ + __builtin_dcache_flush((void *)(__addr)); \ + } while (0) + +#define LOOP_INC (4*64) + + do { + /* By default stop */ + step = 0; + + switch (loops) { + /* Drop Thru Cases! */ + default: + PRIM_FLUSH(flush0, 3); + loops -= 4; + step = 1; + case 3: + PRIM_FLUSH(flush0, 2); + case 2: + PRIM_FLUSH(flush0, 1); + case 1: + PRIM_FLUSH(flush0, 0); + flush0 += LOOP_INC; + case 0: + break; + } + } while (step); +} +EXPORT_SYMBOL(metag_data_cache_flush); + +static void metag_phys_code_cache_flush(const void *start, int bytes) +{ + unsigned long flush0, flush1, flush2, flush3, end_set; + int loops, step; + int thread; + int set_shift, set_size; + int part, offset; + + /* Use a sequence of writes to flush the cache region requested */ + thread = (__core_reg_get(TXENABLE) & TXENABLE_THREAD_BITS) + >> TXENABLE_THREAD_S; + set_shift = icache_set_shift; + + /* Move to the base of the physical cache flush region */ + flush0 = LINSYSCFLUSH_ICACHE_LINE; + step = 64; + + /* Get partition code for this thread */ + part = metag_in32(SYSC_ICPART0 + + (SYSC_xCPARTn_STRIDE * thread)); + + if ((int)start < 0) + /* Access Global vs Local partition */ + part >>= SYSC_xCPARTG_AND_S-SYSC_xCPARTL_AND_S; + + /* Extract offset and move SetOff */ + offset = (part & SYSC_xCPARTL_OR_BITS) + >> SYSC_xCPARTL_OR_S; + flush0 += (offset << (set_shift - 4)); + + /* Shrink size */ + part = (part & SYSC_xCPARTL_AND_BITS) + >> SYSC_xCPARTL_AND_S; + loops = ((part + 1) << (set_shift - 4)); + + /* Where does the Set end? */ + end_set = flush0 + loops; + set_size = loops; + +#ifdef CONFIG_METAG_META12 + if ((bytes < 4096) && (bytes < loops)) { + /* Unreachable on HTP/MTP */ + /* Only target the sets that could be relavent */ + flush0 += (loops - step) & ((int) start); + loops = (((int) start) & (step-1)) + bytes + step - 1; + } +#endif + + /* Reduce loops by step of cache line size */ + loops /= step; + + flush1 = flush0 + (1<<set_shift); + flush2 = flush0 + (2<<set_shift); + flush3 = flush0 + (3<<set_shift); + + if (icache_sets_log2 == 1) { + flush2 = flush1; + flush3 = flush1 + step; + flush1 = flush0 + step; +#if 0 + /* flush0 will stop one line early in this case + * (flush1 will do the final line). + * However we don't correct end_set here at the moment + * because it will never wrap on HTP/MTP + */ + end_set -= step; +#endif + step <<= 1; + loops >>= 1; + } + + /* Clear loops ways in cache */ + while (loops-- != 0) { +#if 0 + /* + * GCC doesn't generate very good code for this so we + * provide inline assembly instead. + */ + /* Clear the ways */ + metag_out8(0, flush0); + metag_out8(0, flush1); + metag_out8(0, flush2); + metag_out8(0, flush3); + + flush0 += step; + flush1 += step; + flush2 += step; + flush3 += step; +#else + asm volatile ( + "SETB\t[%0+%4++],%5\n" + "SETB\t[%1+%4++],%5\n" + "SETB\t[%2+%4++],%5\n" + "SETB\t[%3+%4++],%5\n" + : "+e" (flush0), + "+e" (flush1), + "+e" (flush2), + "+e" (flush3) + : "e" (step), "a" (0)); +#endif + + if (flush0 == end_set) { + /* Wrap within Set 0 */ + flush0 -= set_size; + flush1 -= set_size; + flush2 -= set_size; + flush3 -= set_size; + } + } +} + +void metag_code_cache_flush_all(const void *start) +{ + if ((metag_in32(SYSC_CACHE_MMU_CONFIG) & SYSC_CMMUCFG_IC_ON_BIT) == 0) + /* No need to flush the code cache it's not actually enabled */ + return; + + metag_phys_code_cache_flush(start, 4096); +} +EXPORT_SYMBOL(metag_code_cache_flush_all); + +void metag_code_cache_flush(const void *start, int bytes) +{ +#ifndef CONFIG_METAG_META12 + void *flush; + int loops, step; +#endif /* !CONFIG_METAG_META12 */ + + if ((metag_in32(SYSC_CACHE_MMU_CONFIG) & SYSC_CMMUCFG_IC_ON_BIT) == 0) + /* No need to flush the code cache it's not actually enabled */ + return; + +#ifdef CONFIG_METAG_META12 + /* CACHEWD isn't available on Meta1, so always do full cache flush */ + metag_phys_code_cache_flush(start, bytes); + +#else /* CONFIG_METAG_META12 */ + /* If large size do full physical cache flush */ + if (bytes >= 4096) { + metag_phys_code_cache_flush(start, bytes); + return; + } + + /* Use linear cache flush mechanism on META IP */ + flush = (void *)((int)start & ~(ICACHE_LINE_BYTES-1)); + loops = ((int)start & (ICACHE_LINE_BYTES-1)) + bytes + + (ICACHE_LINE_BYTES-1); + loops >>= ICACHE_LINE_S; + +#define PRIM_IFLUSH(addr, offset) \ + __builtin_meta2_cachewd(((addr) + ((offset) * 64)), CACHEW_ICACHE_BIT) + +#define LOOP_INC (4*64) + + do { + /* By default stop */ + step = 0; + + switch (loops) { + /* Drop Thru Cases! */ + default: + PRIM_IFLUSH(flush, 3); + loops -= 4; + step = 1; + case 3: + PRIM_IFLUSH(flush, 2); + case 2: + PRIM_IFLUSH(flush, 1); + case 1: + PRIM_IFLUSH(flush, 0); + flush += LOOP_INC; + case 0: + break; + } + } while (step); +#endif /* !CONFIG_METAG_META12 */ +} +EXPORT_SYMBOL(metag_code_cache_flush); |