522 lines
13 KiB
C
Executable File
522 lines
13 KiB
C
Executable File
/*
|
|
* arch/metag/mm/cache.c
|
|
*
|
|
* Copyright (C) 2001, 2002, 2005, 2007, 2012 Imagination Technologies.
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify it under
|
|
* the terms of the GNU General Public License version 2 as published by the
|
|
* Free Software Foundation.
|
|
*
|
|
* Cache control code
|
|
*/
|
|
|
|
#include <linux/export.h>
|
|
#include <linux/io.h>
|
|
#include <asm/cacheflush.h>
|
|
#include <asm/core_reg.h>
|
|
#include <asm/global_lock.h>
|
|
#include <asm/metag_isa.h>
|
|
#include <asm/metag_mem.h>
|
|
#include <asm/metag_regs.h>
|
|
|
|
#define DEFAULT_CACHE_WAYS_LOG2 2
|
|
|
|
/*
|
|
* Size of a set in the caches. Initialised for default 16K stride, adjusted
|
|
* according to values passed through TBI global heap segment via LDLK (on ATP)
|
|
* or config registers (on HTP/MTP)
|
|
*/
|
|
static int dcache_set_shift = METAG_TBI_CACHE_SIZE_BASE_LOG2
|
|
- DEFAULT_CACHE_WAYS_LOG2;
|
|
static int icache_set_shift = METAG_TBI_CACHE_SIZE_BASE_LOG2
|
|
- DEFAULT_CACHE_WAYS_LOG2;
|
|
/*
|
|
* The number of sets in the caches. Initialised for HTP/ATP, adjusted
|
|
* according to NOMMU setting in config registers
|
|
*/
|
|
static unsigned char dcache_sets_log2 = DEFAULT_CACHE_WAYS_LOG2;
|
|
static unsigned char icache_sets_log2 = DEFAULT_CACHE_WAYS_LOG2;
|
|
|
|
#ifndef CONFIG_METAG_META12
|
|
/**
|
|
* metag_lnkget_probe() - Probe whether lnkget/lnkset go around the cache
|
|
*/
|
|
static volatile u32 lnkget_testdata[16] __initdata __aligned(64);
|
|
|
|
#define LNKGET_CONSTANT 0xdeadbeef
|
|
|
|
static void __init metag_lnkget_probe(void)
|
|
{
|
|
int temp;
|
|
long flags;
|
|
|
|
/*
|
|
* It's conceivable the user has configured a globally coherent cache
|
|
* shared with non-Linux hardware threads, so use LOCK2 to prevent them
|
|
* from executing and causing cache eviction during the test.
|
|
*/
|
|
__global_lock2(flags);
|
|
|
|
/* read a value to bring it into the cache */
|
|
(void)lnkget_testdata[0];
|
|
lnkget_testdata[0] = 0;
|
|
|
|
/* lnkget/lnkset it to modify it */
|
|
asm volatile(
|
|
"1: LNKGETD %0, [%1]\n"
|
|
" LNKSETD [%1], %2\n"
|
|
" DEFR %0, TXSTAT\n"
|
|
" ANDT %0, %0, #HI(0x3f000000)\n"
|
|
" CMPT %0, #HI(0x02000000)\n"
|
|
" BNZ 1b\n"
|
|
: "=&d" (temp)
|
|
: "da" (&lnkget_testdata[0]), "bd" (LNKGET_CONSTANT)
|
|
: "cc");
|
|
|
|
/* re-read it to see if the cached value changed */
|
|
temp = lnkget_testdata[0];
|
|
|
|
__global_unlock2(flags);
|
|
|
|
/* flush the cache line to fix any incoherency */
|
|
__builtin_dcache_flush((void *)&lnkget_testdata[0]);
|
|
|
|
#if defined(CONFIG_METAG_LNKGET_AROUND_CACHE)
|
|
/* if the cache is right, LNKGET_AROUND_CACHE is unnecessary */
|
|
if (temp == LNKGET_CONSTANT)
|
|
pr_info("LNKGET/SET go through cache but CONFIG_METAG_LNKGET_AROUND_CACHE=y\n");
|
|
#elif defined(CONFIG_METAG_ATOMICITY_LNKGET)
|
|
/*
|
|
* if the cache is wrong, LNKGET_AROUND_CACHE is really necessary
|
|
* because the kernel is configured to use LNKGET/SET for atomicity
|
|
*/
|
|
WARN(temp != LNKGET_CONSTANT,
|
|
"LNKGET/SET go around cache but CONFIG_METAG_LNKGET_AROUND_CACHE=n\n"
|
|
"Expect kernel failure as it's used for atomicity primitives\n");
|
|
#elif defined(CONFIG_SMP)
|
|
/*
|
|
* if the cache is wrong, LNKGET_AROUND_CACHE should be used or the
|
|
* gateway page won't flush and userland could break.
|
|
*/
|
|
WARN(temp != LNKGET_CONSTANT,
|
|
"LNKGET/SET go around cache but CONFIG_METAG_LNKGET_AROUND_CACHE=n\n"
|
|
"Expect userland failure as it's used for user gateway page\n");
|
|
#else
|
|
/*
|
|
* if the cache is wrong, LNKGET_AROUND_CACHE is set wrong, but it
|
|
* doesn't actually matter as it doesn't have any effect on !SMP &&
|
|
* !ATOMICITY_LNKGET.
|
|
*/
|
|
if (temp != LNKGET_CONSTANT)
|
|
pr_warn("LNKGET/SET go around cache but CONFIG_METAG_LNKGET_AROUND_CACHE=n\n");
|
|
#endif
|
|
}
|
|
#endif /* !CONFIG_METAG_META12 */
|
|
|
|
/**
|
|
* metag_cache_probe() - Probe L1 cache configuration.
|
|
*
|
|
* Probe the L1 cache configuration to aid the L1 physical cache flushing
|
|
* functions.
|
|
*/
|
|
void __init metag_cache_probe(void)
|
|
{
|
|
#ifndef CONFIG_METAG_META12
|
|
int coreid = metag_in32(METAC_CORE_ID);
|
|
int config = metag_in32(METAC_CORE_CONFIG2);
|
|
int cfgcache = coreid & METAC_COREID_CFGCACHE_BITS;
|
|
|
|
if (cfgcache == METAC_COREID_CFGCACHE_TYPE0 ||
|
|
cfgcache == METAC_COREID_CFGCACHE_PRIVNOMMU) {
|
|
icache_sets_log2 = 1;
|
|
dcache_sets_log2 = 1;
|
|
}
|
|
|
|
/* For normal size caches, the smallest size is 4Kb.
|
|
For small caches, the smallest size is 64b */
|
|
icache_set_shift = (config & METAC_CORECFG2_ICSMALL_BIT)
|
|
? 6 : 12;
|
|
icache_set_shift += (config & METAC_CORE_C2ICSZ_BITS)
|
|
>> METAC_CORE_C2ICSZ_S;
|
|
icache_set_shift -= icache_sets_log2;
|
|
|
|
dcache_set_shift = (config & METAC_CORECFG2_DCSMALL_BIT)
|
|
? 6 : 12;
|
|
dcache_set_shift += (config & METAC_CORECFG2_DCSZ_BITS)
|
|
>> METAC_CORECFG2_DCSZ_S;
|
|
dcache_set_shift -= dcache_sets_log2;
|
|
|
|
metag_lnkget_probe();
|
|
#else
|
|
/* Extract cache sizes from global heap segment */
|
|
unsigned long val, u;
|
|
int width, shift, addend;
|
|
PTBISEG seg;
|
|
|
|
seg = __TBIFindSeg(NULL, TBID_SEG(TBID_THREAD_GLOBAL,
|
|
TBID_SEGSCOPE_GLOBAL,
|
|
TBID_SEGTYPE_HEAP));
|
|
if (seg != NULL) {
|
|
val = seg->Data[1];
|
|
|
|
/* Work out width of I-cache size bit-field */
|
|
u = ((unsigned long) METAG_TBI_ICACHE_SIZE_BITS)
|
|
>> METAG_TBI_ICACHE_SIZE_S;
|
|
width = 0;
|
|
while (u & 1) {
|
|
width++;
|
|
u >>= 1;
|
|
}
|
|
/* Extract sign-extended size addend value */
|
|
shift = 32 - (METAG_TBI_ICACHE_SIZE_S + width);
|
|
addend = (long) ((val & METAG_TBI_ICACHE_SIZE_BITS)
|
|
<< shift)
|
|
>> (shift + METAG_TBI_ICACHE_SIZE_S);
|
|
/* Now calculate I-cache set size */
|
|
icache_set_shift = (METAG_TBI_CACHE_SIZE_BASE_LOG2
|
|
- DEFAULT_CACHE_WAYS_LOG2)
|
|
+ addend;
|
|
|
|
/* Similarly for D-cache */
|
|
u = ((unsigned long) METAG_TBI_DCACHE_SIZE_BITS)
|
|
>> METAG_TBI_DCACHE_SIZE_S;
|
|
width = 0;
|
|
while (u & 1) {
|
|
width++;
|
|
u >>= 1;
|
|
}
|
|
shift = 32 - (METAG_TBI_DCACHE_SIZE_S + width);
|
|
addend = (long) ((val & METAG_TBI_DCACHE_SIZE_BITS)
|
|
<< shift)
|
|
>> (shift + METAG_TBI_DCACHE_SIZE_S);
|
|
dcache_set_shift = (METAG_TBI_CACHE_SIZE_BASE_LOG2
|
|
- DEFAULT_CACHE_WAYS_LOG2)
|
|
+ addend;
|
|
}
|
|
#endif
|
|
}
|
|
|
|
static void metag_phys_data_cache_flush(const void *start)
|
|
{
|
|
unsigned long flush0, flush1, flush2, flush3;
|
|
int loops, step;
|
|
int thread;
|
|
int part, offset;
|
|
int set_shift;
|
|
|
|
/* Use a sequence of writes to flush the cache region requested */
|
|
thread = (__core_reg_get(TXENABLE) & TXENABLE_THREAD_BITS)
|
|
>> TXENABLE_THREAD_S;
|
|
|
|
/* Cache is broken into sets which lie in contiguous RAMs */
|
|
set_shift = dcache_set_shift;
|
|
|
|
/* Move to the base of the physical cache flush region */
|
|
flush0 = LINSYSCFLUSH_DCACHE_LINE;
|
|
step = 64;
|
|
|
|
/* Get partition data for this thread */
|
|
part = metag_in32(SYSC_DCPART0 +
|
|
(SYSC_xCPARTn_STRIDE * thread));
|
|
|
|
if ((int)start < 0)
|
|
/* Access Global vs Local partition */
|
|
part >>= SYSC_xCPARTG_AND_S
|
|
- SYSC_xCPARTL_AND_S;
|
|
|
|
/* Extract offset and move SetOff */
|
|
offset = (part & SYSC_xCPARTL_OR_BITS)
|
|
>> SYSC_xCPARTL_OR_S;
|
|
flush0 += (offset << (set_shift - 4));
|
|
|
|
/* Shrink size */
|
|
part = (part & SYSC_xCPARTL_AND_BITS)
|
|
>> SYSC_xCPARTL_AND_S;
|
|
loops = ((part + 1) << (set_shift - 4));
|
|
|
|
/* Reduce loops by step of cache line size */
|
|
loops /= step;
|
|
|
|
flush1 = flush0 + (1 << set_shift);
|
|
flush2 = flush0 + (2 << set_shift);
|
|
flush3 = flush0 + (3 << set_shift);
|
|
|
|
if (dcache_sets_log2 == 1) {
|
|
flush2 = flush1;
|
|
flush3 = flush1 + step;
|
|
flush1 = flush0 + step;
|
|
step <<= 1;
|
|
loops >>= 1;
|
|
}
|
|
|
|
/* Clear loops ways in cache */
|
|
while (loops-- != 0) {
|
|
/* Clear the ways. */
|
|
#if 0
|
|
/*
|
|
* GCC doesn't generate very good code for this so we
|
|
* provide inline assembly instead.
|
|
*/
|
|
metag_out8(0, flush0);
|
|
metag_out8(0, flush1);
|
|
metag_out8(0, flush2);
|
|
metag_out8(0, flush3);
|
|
|
|
flush0 += step;
|
|
flush1 += step;
|
|
flush2 += step;
|
|
flush3 += step;
|
|
#else
|
|
asm volatile (
|
|
"SETB\t[%0+%4++],%5\n"
|
|
"SETB\t[%1+%4++],%5\n"
|
|
"SETB\t[%2+%4++],%5\n"
|
|
"SETB\t[%3+%4++],%5\n"
|
|
: "+e" (flush0),
|
|
"+e" (flush1),
|
|
"+e" (flush2),
|
|
"+e" (flush3)
|
|
: "e" (step), "a" (0));
|
|
#endif
|
|
}
|
|
}
|
|
|
|
void metag_data_cache_flush_all(const void *start)
|
|
{
|
|
if ((metag_in32(SYSC_CACHE_MMU_CONFIG) & SYSC_CMMUCFG_DC_ON_BIT) == 0)
|
|
/* No need to flush the data cache it's not actually enabled */
|
|
return;
|
|
|
|
metag_phys_data_cache_flush(start);
|
|
}
|
|
|
|
void metag_data_cache_flush(const void *start, int bytes)
|
|
{
|
|
unsigned long flush0;
|
|
int loops, step;
|
|
|
|
if ((metag_in32(SYSC_CACHE_MMU_CONFIG) & SYSC_CMMUCFG_DC_ON_BIT) == 0)
|
|
/* No need to flush the data cache it's not actually enabled */
|
|
return;
|
|
|
|
if (bytes >= 4096) {
|
|
metag_phys_data_cache_flush(start);
|
|
return;
|
|
}
|
|
|
|
/* Use linear cache flush mechanism on META IP */
|
|
flush0 = (int)start;
|
|
loops = ((int)start & (DCACHE_LINE_BYTES - 1)) + bytes +
|
|
(DCACHE_LINE_BYTES - 1);
|
|
loops >>= DCACHE_LINE_S;
|
|
|
|
#define PRIM_FLUSH(addr, offset) do { \
|
|
int __addr = ((int) (addr)) + ((offset) * 64); \
|
|
__builtin_dcache_flush((void *)(__addr)); \
|
|
} while (0)
|
|
|
|
#define LOOP_INC (4*64)
|
|
|
|
do {
|
|
/* By default stop */
|
|
step = 0;
|
|
|
|
switch (loops) {
|
|
/* Drop Thru Cases! */
|
|
default:
|
|
PRIM_FLUSH(flush0, 3);
|
|
loops -= 4;
|
|
step = 1;
|
|
case 3:
|
|
PRIM_FLUSH(flush0, 2);
|
|
case 2:
|
|
PRIM_FLUSH(flush0, 1);
|
|
case 1:
|
|
PRIM_FLUSH(flush0, 0);
|
|
flush0 += LOOP_INC;
|
|
case 0:
|
|
break;
|
|
}
|
|
} while (step);
|
|
}
|
|
EXPORT_SYMBOL(metag_data_cache_flush);
|
|
|
|
static void metag_phys_code_cache_flush(const void *start, int bytes)
|
|
{
|
|
unsigned long flush0, flush1, flush2, flush3, end_set;
|
|
int loops, step;
|
|
int thread;
|
|
int set_shift, set_size;
|
|
int part, offset;
|
|
|
|
/* Use a sequence of writes to flush the cache region requested */
|
|
thread = (__core_reg_get(TXENABLE) & TXENABLE_THREAD_BITS)
|
|
>> TXENABLE_THREAD_S;
|
|
set_shift = icache_set_shift;
|
|
|
|
/* Move to the base of the physical cache flush region */
|
|
flush0 = LINSYSCFLUSH_ICACHE_LINE;
|
|
step = 64;
|
|
|
|
/* Get partition code for this thread */
|
|
part = metag_in32(SYSC_ICPART0 +
|
|
(SYSC_xCPARTn_STRIDE * thread));
|
|
|
|
if ((int)start < 0)
|
|
/* Access Global vs Local partition */
|
|
part >>= SYSC_xCPARTG_AND_S-SYSC_xCPARTL_AND_S;
|
|
|
|
/* Extract offset and move SetOff */
|
|
offset = (part & SYSC_xCPARTL_OR_BITS)
|
|
>> SYSC_xCPARTL_OR_S;
|
|
flush0 += (offset << (set_shift - 4));
|
|
|
|
/* Shrink size */
|
|
part = (part & SYSC_xCPARTL_AND_BITS)
|
|
>> SYSC_xCPARTL_AND_S;
|
|
loops = ((part + 1) << (set_shift - 4));
|
|
|
|
/* Where does the Set end? */
|
|
end_set = flush0 + loops;
|
|
set_size = loops;
|
|
|
|
#ifdef CONFIG_METAG_META12
|
|
if ((bytes < 4096) && (bytes < loops)) {
|
|
/* Unreachable on HTP/MTP */
|
|
/* Only target the sets that could be relavent */
|
|
flush0 += (loops - step) & ((int) start);
|
|
loops = (((int) start) & (step-1)) + bytes + step - 1;
|
|
}
|
|
#endif
|
|
|
|
/* Reduce loops by step of cache line size */
|
|
loops /= step;
|
|
|
|
flush1 = flush0 + (1<<set_shift);
|
|
flush2 = flush0 + (2<<set_shift);
|
|
flush3 = flush0 + (3<<set_shift);
|
|
|
|
if (icache_sets_log2 == 1) {
|
|
flush2 = flush1;
|
|
flush3 = flush1 + step;
|
|
flush1 = flush0 + step;
|
|
#if 0
|
|
/* flush0 will stop one line early in this case
|
|
* (flush1 will do the final line).
|
|
* However we don't correct end_set here at the moment
|
|
* because it will never wrap on HTP/MTP
|
|
*/
|
|
end_set -= step;
|
|
#endif
|
|
step <<= 1;
|
|
loops >>= 1;
|
|
}
|
|
|
|
/* Clear loops ways in cache */
|
|
while (loops-- != 0) {
|
|
#if 0
|
|
/*
|
|
* GCC doesn't generate very good code for this so we
|
|
* provide inline assembly instead.
|
|
*/
|
|
/* Clear the ways */
|
|
metag_out8(0, flush0);
|
|
metag_out8(0, flush1);
|
|
metag_out8(0, flush2);
|
|
metag_out8(0, flush3);
|
|
|
|
flush0 += step;
|
|
flush1 += step;
|
|
flush2 += step;
|
|
flush3 += step;
|
|
#else
|
|
asm volatile (
|
|
"SETB\t[%0+%4++],%5\n"
|
|
"SETB\t[%1+%4++],%5\n"
|
|
"SETB\t[%2+%4++],%5\n"
|
|
"SETB\t[%3+%4++],%5\n"
|
|
: "+e" (flush0),
|
|
"+e" (flush1),
|
|
"+e" (flush2),
|
|
"+e" (flush3)
|
|
: "e" (step), "a" (0));
|
|
#endif
|
|
|
|
if (flush0 == end_set) {
|
|
/* Wrap within Set 0 */
|
|
flush0 -= set_size;
|
|
flush1 -= set_size;
|
|
flush2 -= set_size;
|
|
flush3 -= set_size;
|
|
}
|
|
}
|
|
}
|
|
|
|
void metag_code_cache_flush_all(const void *start)
|
|
{
|
|
if ((metag_in32(SYSC_CACHE_MMU_CONFIG) & SYSC_CMMUCFG_IC_ON_BIT) == 0)
|
|
/* No need to flush the code cache it's not actually enabled */
|
|
return;
|
|
|
|
metag_phys_code_cache_flush(start, 4096);
|
|
}
|
|
EXPORT_SYMBOL(metag_code_cache_flush_all);
|
|
|
|
void metag_code_cache_flush(const void *start, int bytes)
|
|
{
|
|
#ifndef CONFIG_METAG_META12
|
|
void *flush;
|
|
int loops, step;
|
|
#endif /* !CONFIG_METAG_META12 */
|
|
|
|
if ((metag_in32(SYSC_CACHE_MMU_CONFIG) & SYSC_CMMUCFG_IC_ON_BIT) == 0)
|
|
/* No need to flush the code cache it's not actually enabled */
|
|
return;
|
|
|
|
#ifdef CONFIG_METAG_META12
|
|
/* CACHEWD isn't available on Meta1, so always do full cache flush */
|
|
metag_phys_code_cache_flush(start, bytes);
|
|
|
|
#else /* CONFIG_METAG_META12 */
|
|
/* If large size do full physical cache flush */
|
|
if (bytes >= 4096) {
|
|
metag_phys_code_cache_flush(start, bytes);
|
|
return;
|
|
}
|
|
|
|
/* Use linear cache flush mechanism on META IP */
|
|
flush = (void *)((int)start & ~(ICACHE_LINE_BYTES-1));
|
|
loops = ((int)start & (ICACHE_LINE_BYTES-1)) + bytes +
|
|
(ICACHE_LINE_BYTES-1);
|
|
loops >>= ICACHE_LINE_S;
|
|
|
|
#define PRIM_IFLUSH(addr, offset) \
|
|
__builtin_meta2_cachewd(((addr) + ((offset) * 64)), CACHEW_ICACHE_BIT)
|
|
|
|
#define LOOP_INC (4*64)
|
|
|
|
do {
|
|
/* By default stop */
|
|
step = 0;
|
|
|
|
switch (loops) {
|
|
/* Drop Thru Cases! */
|
|
default:
|
|
PRIM_IFLUSH(flush, 3);
|
|
loops -= 4;
|
|
step = 1;
|
|
case 3:
|
|
PRIM_IFLUSH(flush, 2);
|
|
case 2:
|
|
PRIM_IFLUSH(flush, 1);
|
|
case 1:
|
|
PRIM_IFLUSH(flush, 0);
|
|
flush += LOOP_INC;
|
|
case 0:
|
|
break;
|
|
}
|
|
} while (step);
|
|
#endif /* !CONFIG_METAG_META12 */
|
|
}
|
|
EXPORT_SYMBOL(metag_code_cache_flush);
|