From e09b41010ba33a20a87472ee821fa407a5b8da36 Mon Sep 17 00:00:00 2001 From: José Pekkarinen Date: Mon, 11 Apr 2016 10:41:07 +0300 Subject: These changes are the raw update to linux-4.4.6-rt14. Kernel sources are taken from kernel.org, and rt patch from the rt wiki download page. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit During the rebasing, the following patch collided: Force tick interrupt and get rid of softirq magic(I70131fb85). Collisions have been removed because its logic was found on the source already. Change-Id: I7f57a4081d9deaa0d9ccfc41a6c8daccdee3b769 Signed-off-by: José Pekkarinen --- kernel/arch/arc/include/asm/Kbuild | 4 +- kernel/arch/arc/include/asm/arcregs.h | 76 +++- kernel/arch/arc/include/asm/atomic.h | 88 +++- kernel/arch/arc/include/asm/barrier.h | 48 +++ kernel/arch/arc/include/asm/bitops.h | 71 ++++ kernel/arch/arc/include/asm/cache.h | 28 +- kernel/arch/arc/include/asm/cacheflush.h | 10 +- kernel/arch/arc/include/asm/cmpxchg.h | 22 +- kernel/arch/arc/include/asm/delay.h | 9 +- kernel/arch/arc/include/asm/dma-mapping.h | 43 +- kernel/arch/arc/include/asm/elf.h | 5 + kernel/arch/arc/include/asm/entry-arcv2.h | 190 +++++++++ kernel/arch/arc/include/asm/entry-compact.h | 306 ++++++++++++++ kernel/arch/arc/include/asm/entry.h | 378 +---------------- kernel/arch/arc/include/asm/futex.h | 104 +++-- kernel/arch/arc/include/asm/highmem.h | 61 +++ kernel/arch/arc/include/asm/hugepage.h | 81 ++++ kernel/arch/arc/include/asm/io.h | 43 +- kernel/arch/arc/include/asm/irq.h | 7 + kernel/arch/arc/include/asm/irqflags-arcv2.h | 138 +++++++ kernel/arch/arc/include/asm/irqflags-compact.h | 199 +++++++++ kernel/arch/arc/include/asm/irqflags.h | 168 +------- kernel/arch/arc/include/asm/kmap_types.h | 18 + kernel/arch/arc/include/asm/mach_desc.h | 10 +- kernel/arch/arc/include/asm/mcip.h | 91 +++++ kernel/arch/arc/include/asm/mmu.h | 31 +- kernel/arch/arc/include/asm/page.h | 9 +- kernel/arch/arc/include/asm/perf_event.h | 23 +- kernel/arch/arc/include/asm/pgalloc.h | 12 +- kernel/arch/arc/include/asm/pgtable.h | 96 +++-- kernel/arch/arc/include/asm/processor.h | 44 +- kernel/arch/arc/include/asm/ptrace.h | 71 +++- kernel/arch/arc/include/asm/setup.h | 7 + kernel/arch/arc/include/asm/smp.h | 7 + kernel/arch/arc/include/asm/spinlock.h | 538 ++++++++++++++++++++++++- kernel/arch/arc/include/asm/spinlock_types.h | 2 + kernel/arch/arc/include/asm/thread_info.h | 1 + kernel/arch/arc/include/asm/tlbflush.h | 5 + kernel/arch/arc/include/asm/uaccess.h | 17 +- kernel/arch/arc/include/asm/unwind.h | 4 - 40 files changed, 2286 insertions(+), 779 deletions(-) create mode 100644 kernel/arch/arc/include/asm/barrier.h create mode 100644 kernel/arch/arc/include/asm/entry-arcv2.h create mode 100644 kernel/arch/arc/include/asm/entry-compact.h create mode 100644 kernel/arch/arc/include/asm/highmem.h create mode 100644 kernel/arch/arc/include/asm/hugepage.h create mode 100644 kernel/arch/arc/include/asm/irqflags-arcv2.h create mode 100644 kernel/arch/arc/include/asm/irqflags-compact.h create mode 100644 kernel/arch/arc/include/asm/kmap_types.h create mode 100644 kernel/arch/arc/include/asm/mcip.h (limited to 'kernel/arch/arc/include/asm') diff --git a/kernel/arch/arc/include/asm/Kbuild b/kernel/arch/arc/include/asm/Kbuild index be0c39e76..0b10ef2a4 100644 --- a/kernel/arch/arc/include/asm/Kbuild +++ b/kernel/arch/arc/include/asm/Kbuild @@ -1,5 +1,4 @@ generic-y += auxvec.h -generic-y += barrier.h generic-y += bitsperlong.h generic-y += bugs.h generic-y += clkdev.h @@ -23,6 +22,7 @@ generic-y += kvm_para.h generic-y += local.h generic-y += local64.h generic-y += mcs_spinlock.h +generic-y += mm-arch-hooks.h generic-y += mman.h generic-y += msgbuf.h generic-y += param.h @@ -33,7 +33,6 @@ generic-y += poll.h generic-y += posix_types.h generic-y += preempt.h generic-y += resource.h -generic-y += scatterlist.h generic-y += sembuf.h generic-y += shmbuf.h generic-y += siginfo.h @@ -49,4 +48,5 @@ generic-y += types.h generic-y += ucontext.h generic-y += user.h generic-y += vga.h +generic-y += word-at-a-time.h generic-y += xor.h diff --git a/kernel/arch/arc/include/asm/arcregs.h b/kernel/arch/arc/include/asm/arcregs.h index e2b1b1211..7fac7d85e 100644 --- a/kernel/arch/arc/include/asm/arcregs.h +++ b/kernel/arch/arc/include/asm/arcregs.h @@ -16,6 +16,8 @@ #define ARC_REG_PERIBASE_BCR 0x69 #define ARC_REG_FP_BCR 0x6B /* ARCompact: Single-Precision FPU */ #define ARC_REG_DPFP_BCR 0x6C /* ARCompact: Dbl Precision FPU */ +#define ARC_REG_FP_V2_BCR 0xc8 /* ARCv2 FPU */ +#define ARC_REG_SLC_BCR 0xce #define ARC_REG_DCCM_BCR 0x74 /* DCCM Present + SZ */ #define ARC_REG_TIMERS_BCR 0x75 #define ARC_REG_AP_BCR 0x76 @@ -31,7 +33,9 @@ #define ARC_REG_BPU_BCR 0xc0 #define ARC_REG_ISA_CFG_BCR 0xc1 #define ARC_REG_RTT_BCR 0xF2 +#define ARC_REG_IRQ_BCR 0xF3 #define ARC_REG_SMART_BCR 0xFF +#define ARC_REG_CLUSTER_BCR 0xcf /* status32 Bits Positions */ #define STATUS_AE_BIT 5 /* Exception active */ @@ -51,6 +55,7 @@ * [15: 8] = Exception Cause Code * [ 7: 0] = Exception Parameters (for certain types only) */ +#ifdef CONFIG_ISA_ARCOMPACT #define ECR_V_MEM_ERR 0x01 #define ECR_V_INSN_ERR 0x02 #define ECR_V_MACH_CHK 0x20 @@ -58,6 +63,15 @@ #define ECR_V_DTLB_MISS 0x22 #define ECR_V_PROTV 0x23 #define ECR_V_TRAP 0x25 +#else +#define ECR_V_MEM_ERR 0x01 +#define ECR_V_INSN_ERR 0x02 +#define ECR_V_MACH_CHK 0x03 +#define ECR_V_ITLB_MISS 0x04 +#define ECR_V_DTLB_MISS 0x05 +#define ECR_V_PROTV 0x06 +#define ECR_V_TRAP 0x09 +#endif /* DTLB Miss and Protection Violation Cause Codes */ @@ -76,14 +90,10 @@ #define ECR_C_BIT_DTLB_LD_MISS 8 #define ECR_C_BIT_DTLB_ST_MISS 9 -/* Dummy ECR values for Interrupts */ -#define event_IRQ1 0x0031abcd -#define event_IRQ2 0x0032abcd - /* Auxiliary registers */ #define AUX_IDENTITY 4 #define AUX_INTR_VEC_BASE 0x25 - +#define AUX_NON_VOL 0x5e /* * Floating Pt Registers @@ -110,7 +120,7 @@ /* gcc builtin sr needs reg param to be long immediate */ #define write_aux_reg(reg_immed, val) \ - __builtin_arc_sr((unsigned int)val, reg_immed) + __builtin_arc_sr((unsigned int)(val), reg_immed) #else @@ -204,9 +214,11 @@ struct bcr_identity { struct bcr_isa { #ifdef CONFIG_CPU_BIG_ENDIAN - unsigned int pad1:23, atomic1:1, ver:8; + unsigned int div_rem:4, pad2:4, ldd:1, unalign:1, atomic:1, be:1, + pad1:11, atomic1:1, ver:8; #else - unsigned int ver:8, atomic1:1, pad1:23; + unsigned int ver:8, atomic1:1, pad1:11, be:1, atomic:1, unalign:1, + ldd:1, pad2:4, div_rem:4; #endif }; @@ -228,9 +240,9 @@ struct bcr_extn_xymem { struct bcr_perip { #ifdef CONFIG_CPU_BIG_ENDIAN - unsigned int start:8, pad2:8, sz:8, pad:8; + unsigned int start:8, pad2:8, sz:8, ver:8; #else - unsigned int pad:8, sz:8, pad2:8, start:8; + unsigned int ver:8, sz:8, pad2:8, start:8; #endif }; @@ -269,11 +281,19 @@ struct bcr_fp_arcompact { #endif }; +struct bcr_fp_arcv2 { +#ifdef CONFIG_CPU_BIG_ENDIAN + unsigned int pad2:15, dp:1, pad1:7, sp:1, ver:8; +#else + unsigned int ver:8, sp:1, pad1:7, dp:1, pad2:15; +#endif +}; + struct bcr_timer { #ifdef CONFIG_CPU_BIG_ENDIAN - unsigned int pad2:15, rtsc:1, pad1:6, t1:1, t0:1, ver:8; + unsigned int pad2:15, rtsc:1, pad1:5, rtc:1, t1:1, t0:1, ver:8; #else - unsigned int ver:8, t0:1, t1:1, pad1:6, rtsc:1, pad2:15; + unsigned int ver:8, t0:1, t1:1, rtc:1, pad1:5, rtsc:1, pad2:15; #endif }; @@ -285,6 +305,14 @@ struct bcr_bpu_arcompact { #endif }; +struct bcr_bpu_arcv2 { +#ifdef CONFIG_CPU_BIG_ENDIAN + unsigned int pad:6, fbe:2, tqe:2, ts:4, ft:1, rse:2, pte:3, bce:3, ver:8; +#else + unsigned int ver:8, bce:3, pte:3, rse:2, ft:1, ts:4, tqe:2, fbe:2, pad:6; +#endif +}; + struct bcr_generic { #ifdef CONFIG_CPU_BIG_ENDIAN unsigned int pad:24, ver:8; @@ -299,11 +327,12 @@ struct bcr_generic { */ struct cpuinfo_arc_mmu { - unsigned int ver, pg_sz, sets, ways, u_dtlb, u_itlb, num_tlb; + unsigned int ver:4, pg_sz_k:8, s_pg_sz_m:8, pad:10, sasid:1, pae:1; + unsigned int sets:12, ways:4, u_dtlb:8, u_itlb:8; }; struct cpuinfo_arc_cache { - unsigned int sz_k:8, line_len:8, assoc:4, ver:4, alias:1, vipt:1, pad:6; + unsigned int sz_k:14, line_len:8, assoc:4, ver:4, alias:1, vipt:1; }; struct cpuinfo_arc_bpu { @@ -315,14 +344,13 @@ struct cpuinfo_arc_ccm { }; struct cpuinfo_arc { - struct cpuinfo_arc_cache icache, dcache; + struct cpuinfo_arc_cache icache, dcache, slc; struct cpuinfo_arc_mmu mmu; struct cpuinfo_arc_bpu bpu; struct bcr_identity core; struct bcr_isa isa; struct bcr_timer timers; unsigned int vec_base; - unsigned int uncached_base; struct cpuinfo_arc_ccm iccm, dccm; struct { unsigned int swap:1, norm:1, minmax:1, barrel:1, crc:1, pad1:3, @@ -336,6 +364,22 @@ struct cpuinfo_arc { extern struct cpuinfo_arc cpuinfo_arc700[]; +static inline int is_isa_arcv2(void) +{ + return IS_ENABLED(CONFIG_ISA_ARCV2); +} + +static inline int is_isa_arcompact(void) +{ + return IS_ENABLED(CONFIG_ISA_ARCOMPACT); +} + +#if defined(CONFIG_ISA_ARCOMPACT) && !defined(_CPU_DEFAULT_A7) +#error "Toolchain not configured for ARCompact builds" +#elif defined(CONFIG_ISA_ARCV2) && !defined(_CPU_DEFAULT_HS) +#error "Toolchain not configured for ARCv2 builds" +#endif + #endif /* __ASEMBLY__ */ #endif /* _ASM_ARC_ARCREGS_H */ diff --git a/kernel/arch/arc/include/asm/atomic.h b/kernel/arch/arc/include/asm/atomic.h index 20b7dc179..7730d302c 100644 --- a/kernel/arch/arc/include/asm/atomic.h +++ b/kernel/arch/arc/include/asm/atomic.h @@ -17,31 +17,66 @@ #include #include -#define atomic_read(v) ((v)->counter) +#define atomic_read(v) READ_ONCE((v)->counter) #ifdef CONFIG_ARC_HAS_LLSC -#define atomic_set(v, i) (((v)->counter) = (i)) +#define atomic_set(v, i) WRITE_ONCE(((v)->counter), (i)) + +#ifdef CONFIG_ARC_STAR_9000923308 + +#define SCOND_FAIL_RETRY_VAR_DEF \ + unsigned int delay = 1, tmp; \ + +#define SCOND_FAIL_RETRY_ASM \ + " bz 4f \n" \ + " ; --- scond fail delay --- \n" \ + " mov %[tmp], %[delay] \n" /* tmp = delay */ \ + "2: brne.d %[tmp], 0, 2b \n" /* while (tmp != 0) */ \ + " sub %[tmp], %[tmp], 1 \n" /* tmp-- */ \ + " rol %[delay], %[delay] \n" /* delay *= 2 */ \ + " b 1b \n" /* start over */ \ + "4: ; --- success --- \n" \ + +#define SCOND_FAIL_RETRY_VARS \ + ,[delay] "+&r" (delay),[tmp] "=&r" (tmp) \ + +#else /* !CONFIG_ARC_STAR_9000923308 */ + +#define SCOND_FAIL_RETRY_VAR_DEF + +#define SCOND_FAIL_RETRY_ASM \ + " bnz 1b \n" \ + +#define SCOND_FAIL_RETRY_VARS + +#endif #define ATOMIC_OP(op, c_op, asm_op) \ static inline void atomic_##op(int i, atomic_t *v) \ { \ - unsigned int temp; \ + unsigned int val; \ + SCOND_FAIL_RETRY_VAR_DEF \ \ __asm__ __volatile__( \ - "1: llock %0, [%1] \n" \ - " " #asm_op " %0, %0, %2 \n" \ - " scond %0, [%1] \n" \ - " bnz 1b \n" \ - : "=&r"(temp) /* Early clobber, to prevent reg reuse */ \ - : "r"(&v->counter), "ir"(i) \ + "1: llock %[val], [%[ctr]] \n" \ + " " #asm_op " %[val], %[val], %[i] \n" \ + " scond %[val], [%[ctr]] \n" \ + " \n" \ + SCOND_FAIL_RETRY_ASM \ + \ + : [val] "=&r" (val) /* Early clobber to prevent reg reuse */ \ + SCOND_FAIL_RETRY_VARS \ + : [ctr] "r" (&v->counter), /* Not "m": llock only supports reg direct addr mode */ \ + [i] "ir" (i) \ : "cc"); \ } \ #define ATOMIC_OP_RETURN(op, c_op, asm_op) \ static inline int atomic_##op##_return(int i, atomic_t *v) \ { \ - unsigned int temp; \ + unsigned int val; \ + SCOND_FAIL_RETRY_VAR_DEF \ \ /* \ * Explicit full memory barrier needed before/after as \ @@ -50,17 +85,21 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \ smp_mb(); \ \ __asm__ __volatile__( \ - "1: llock %0, [%1] \n" \ - " " #asm_op " %0, %0, %2 \n" \ - " scond %0, [%1] \n" \ - " bnz 1b \n" \ - : "=&r"(temp) \ - : "r"(&v->counter), "ir"(i) \ + "1: llock %[val], [%[ctr]] \n" \ + " " #asm_op " %[val], %[val], %[i] \n" \ + " scond %[val], [%[ctr]] \n" \ + " \n" \ + SCOND_FAIL_RETRY_ASM \ + \ + : [val] "=&r" (val) \ + SCOND_FAIL_RETRY_VARS \ + : [ctr] "r" (&v->counter), \ + [i] "ir" (i) \ : "cc"); \ \ smp_mb(); \ \ - return temp; \ + return val; \ } #else /* !CONFIG_ARC_HAS_LLSC */ @@ -68,7 +107,7 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \ #ifndef CONFIG_SMP /* violating atomic_xxx API locking protocol in UP for optimization sake */ -#define atomic_set(v, i) (((v)->counter) = (i)) +#define atomic_set(v, i) WRITE_ONCE(((v)->counter), (i)) #else @@ -86,7 +125,7 @@ static inline void atomic_set(atomic_t *v, int i) unsigned long flags; atomic_ops_lock(flags); - v->counter = i; + WRITE_ONCE(v->counter, i); atomic_ops_unlock(flags); } @@ -133,13 +172,20 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \ ATOMIC_OPS(add, +=, add) ATOMIC_OPS(sub, -=, sub) -ATOMIC_OP(and, &=, and) -#define atomic_clear_mask(mask, v) atomic_and(~(mask), (v)) +#define atomic_andnot atomic_andnot + +ATOMIC_OP(and, &=, and) +ATOMIC_OP(andnot, &= ~, bic) +ATOMIC_OP(or, |=, or) +ATOMIC_OP(xor, ^=, xor) #undef ATOMIC_OPS #undef ATOMIC_OP_RETURN #undef ATOMIC_OP +#undef SCOND_FAIL_RETRY_VAR_DEF +#undef SCOND_FAIL_RETRY_ASM +#undef SCOND_FAIL_RETRY_VARS /** * __atomic_add_unless - add unless the number is a given value diff --git a/kernel/arch/arc/include/asm/barrier.h b/kernel/arch/arc/include/asm/barrier.h new file mode 100644 index 000000000..a7209983e --- /dev/null +++ b/kernel/arch/arc/include/asm/barrier.h @@ -0,0 +1,48 @@ +/* + * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __ASM_BARRIER_H +#define __ASM_BARRIER_H + +#ifdef CONFIG_ISA_ARCV2 + +/* + * ARCv2 based HS38 cores are in-order issue, but still weakly ordered + * due to micro-arch buffering/queuing of load/store, cache hit vs. miss ... + * + * Explicit barrier provided by DMB instruction + * - Operand supports fine grained load/store/load+store semantics + * - Ensures that selected memory operation issued before it will complete + * before any subsequent memory operation of same type + * - DMB guarantees SMP as well as local barrier semantics + * (asm-generic/barrier.h ensures sane smp_*mb if not defined here, i.e. + * UP: barrier(), SMP: smp_*mb == *mb) + * - DSYNC provides DMB+completion_of_cache_bpu_maintenance_ops hence not needed + * in the general case. Plus it only provides full barrier. + */ + +#define mb() asm volatile("dmb 3\n" : : : "memory") +#define rmb() asm volatile("dmb 1\n" : : : "memory") +#define wmb() asm volatile("dmb 2\n" : : : "memory") + +#endif + +#ifdef CONFIG_ISA_ARCOMPACT + +/* + * ARCompact based cores (ARC700) only have SYNC instruction which is super + * heavy weight as it flushes the pipeline as well. + * There are no real SMP implementations of such cores. + */ + +#define mb() asm volatile("sync\n" : : : "memory") +#endif + +#include + +#endif diff --git a/kernel/arch/arc/include/asm/bitops.h b/kernel/arch/arc/include/asm/bitops.h index dae03e66f..57c1f3384 100644 --- a/kernel/arch/arc/include/asm/bitops.h +++ b/kernel/arch/arc/include/asm/bitops.h @@ -215,6 +215,8 @@ test_bit(unsigned int nr, const volatile unsigned long *addr) return ((mask & *addr) != 0); } +#ifdef CONFIG_ISA_ARCOMPACT + /* * Count the number of zeros, starting from MSB * Helper for fls( ) friends @@ -307,6 +309,75 @@ static inline __attribute__ ((const)) int __ffs(unsigned long word) return ffs(word) - 1; } +#else /* CONFIG_ISA_ARCV2 */ + +/* + * fls = Find Last Set in word + * @result: [1-32] + * fls(1) = 1, fls(0x80000000) = 32, fls(0) = 0 + */ +static inline __attribute__ ((const)) int fls(unsigned long x) +{ + int n; + + asm volatile( + " fls.f %0, %1 \n" /* 0:31; 0(Z) if src 0 */ + " add.nz %0, %0, 1 \n" /* 0:31 -> 1:32 */ + : "=r"(n) /* Early clobber not needed */ + : "r"(x) + : "cc"); + + return n; +} + +/* + * __fls: Similar to fls, but zero based (0-31). Also 0 if no bit set + */ +static inline __attribute__ ((const)) int __fls(unsigned long x) +{ + /* FLS insn has exactly same semantics as the API */ + return __builtin_arc_fls(x); +} + +/* + * ffs = Find First Set in word (LSB to MSB) + * @result: [1-32], 0 if all 0's + */ +static inline __attribute__ ((const)) int ffs(unsigned long x) +{ + int n; + + asm volatile( + " ffs.f %0, %1 \n" /* 0:31; 31(Z) if src 0 */ + " add.nz %0, %0, 1 \n" /* 0:31 -> 1:32 */ + " mov.z %0, 0 \n" /* 31(Z)-> 0 */ + : "=r"(n) /* Early clobber not needed */ + : "r"(x) + : "cc"); + + return n; +} + +/* + * __ffs: Similar to ffs, but zero based (0-31) + */ +static inline __attribute__ ((const)) int __ffs(unsigned long x) +{ + int n; + + asm volatile( + " ffs.f %0, %1 \n" /* 0:31; 31(Z) if src 0 */ + " mov.z %0, 0 \n" /* 31(Z)-> 0 */ + : "=r"(n) + : "r"(x) + : "cc"); + + return n; + +} + +#endif /* CONFIG_ISA_ARCOMPACT */ + /* * ffz = Find First Zero in word. * @return:[0-31], 32 if all 1's diff --git a/kernel/arch/arc/include/asm/cache.h b/kernel/arch/arc/include/asm/cache.h index 7861255da..210ef3e72 100644 --- a/kernel/arch/arc/include/asm/cache.h +++ b/kernel/arch/arc/include/asm/cache.h @@ -53,6 +53,8 @@ extern void arc_cache_init(void); extern char *arc_cache_mumbojumbo(int cpu_id, char *buf, int len); extern void read_decode_cache_bcr(void); +extern int ioc_exists; + #endif /* !__ASSEMBLY__ */ /* Instruction cache related Auxiliary registers */ @@ -60,9 +62,8 @@ extern void read_decode_cache_bcr(void); #define ARC_REG_IC_IVIC 0x10 #define ARC_REG_IC_CTRL 0x11 #define ARC_REG_IC_IVIL 0x19 -#if defined(CONFIG_ARC_MMU_V3) #define ARC_REG_IC_PTAG 0x1E -#endif +#define ARC_REG_IC_PTAG_HI 0x1F /* Bit val in IC_CTRL */ #define IC_CTRL_CACHE_DISABLE 0x1 @@ -74,12 +75,31 @@ extern void read_decode_cache_bcr(void); #define ARC_REG_DC_IVDL 0x4A #define ARC_REG_DC_FLSH 0x4B #define ARC_REG_DC_FLDL 0x4C -#if defined(CONFIG_ARC_MMU_V3) #define ARC_REG_DC_PTAG 0x5C -#endif +#define ARC_REG_DC_PTAG_HI 0x5F /* Bit val in DC_CTRL */ #define DC_CTRL_INV_MODE_FLUSH 0x40 #define DC_CTRL_FLUSH_STATUS 0x100 +/*System-level cache (L2 cache) related Auxiliary registers */ +#define ARC_REG_SLC_CFG 0x901 +#define ARC_REG_SLC_CTRL 0x903 +#define ARC_REG_SLC_FLUSH 0x904 +#define ARC_REG_SLC_INVALIDATE 0x905 +#define ARC_REG_SLC_RGN_START 0x914 +#define ARC_REG_SLC_RGN_END 0x916 + +/* Bit val in SLC_CONTROL */ +#define SLC_CTRL_IM 0x040 +#define SLC_CTRL_DISABLE 0x001 +#define SLC_CTRL_BUSY 0x100 +#define SLC_CTRL_RGN_OP_INV 0x200 + +/* IO coherency related Auxiliary registers */ +#define ARC_REG_IO_COH_ENABLE 0x500 +#define ARC_REG_IO_COH_PARTIAL 0x501 +#define ARC_REG_IO_COH_AP0_BASE 0x508 +#define ARC_REG_IO_COH_AP0_SIZE 0x509 + #endif /* _ASM_CACHE_H */ diff --git a/kernel/arch/arc/include/asm/cacheflush.h b/kernel/arch/arc/include/asm/cacheflush.h index 6abc4972b..fbe3587c4 100644 --- a/kernel/arch/arc/include/asm/cacheflush.h +++ b/kernel/arch/arc/include/asm/cacheflush.h @@ -31,12 +31,10 @@ void flush_cache_all(void); -void flush_icache_range(unsigned long start, unsigned long end); -void __sync_icache_dcache(unsigned long paddr, unsigned long vaddr, int len); -void __inv_icache_page(unsigned long paddr, unsigned long vaddr); -void ___flush_dcache_page(unsigned long paddr, unsigned long vaddr); -#define __flush_dcache_page(p, v) \ - ___flush_dcache_page((unsigned long)p, (unsigned long)v) +void flush_icache_range(unsigned long kstart, unsigned long kend); +void __sync_icache_dcache(phys_addr_t paddr, unsigned long vaddr, int len); +void __inv_icache_page(phys_addr_t paddr, unsigned long vaddr); +void __flush_dcache_page(phys_addr_t paddr, unsigned long vaddr); #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1 diff --git a/kernel/arch/arc/include/asm/cmpxchg.h b/kernel/arch/arc/include/asm/cmpxchg.h index 44fd531f4..af7a2db13 100644 --- a/kernel/arch/arc/include/asm/cmpxchg.h +++ b/kernel/arch/arc/include/asm/cmpxchg.h @@ -110,18 +110,18 @@ static inline unsigned long __xchg(unsigned long val, volatile void *ptr, sizeof(*(ptr)))) /* - * On ARC700, EX insn is inherently atomic, so by default "vanilla" xchg() need - * not require any locking. However there's a quirk. - * ARC lacks native CMPXCHG, thus emulated (see above), using external locking - - * incidently it "reuses" the same atomic_ops_lock used by atomic APIs. - * Now, llist code uses cmpxchg() and xchg() on same data, so xchg() needs to - * abide by same serializing rules, thus ends up using atomic_ops_lock as well. + * xchg() maps directly to ARC EX instruction which guarantees atomicity. + * However in !LLSC config, it also needs to be use @atomic_ops_lock spinlock + * due to a subtle reason: + * - For !LLSC, cmpxchg() needs to use that lock (see above) and there is lot + * of kernel code which calls xchg()/cmpxchg() on same data (see llist.h) + * Hence xchg() needs to follow same locking rules. * - * This however is only relevant if SMP and/or ARC lacks LLSC - * if (UP or LLSC) - * xchg doesn't need serialization - * else <==> !(UP or LLSC) <==> (!UP and !LLSC) <==> (SMP and !LLSC) - * xchg needs serialization + * Technically the lock is also needed for UP (boils down to irq save/restore) + * but we can cheat a bit since cmpxchg() atomic_ops_lock() would cause irqs to + * be disabled thus can't possibly be interrpted/preempted/clobbered by xchg() + * Other way around, xchg is one instruction anyways, so can't be interrupted + * as such */ #if !defined(CONFIG_ARC_HAS_LLSC) && defined(CONFIG_SMP) diff --git a/kernel/arch/arc/include/asm/delay.h b/kernel/arch/arc/include/asm/delay.h index 43de30256..08e7e2a16 100644 --- a/kernel/arch/arc/include/asm/delay.h +++ b/kernel/arch/arc/include/asm/delay.h @@ -22,11 +22,10 @@ static inline void __delay(unsigned long loops) { __asm__ __volatile__( - "1: sub.f %0, %0, 1 \n" - " jpnz 1b \n" - : "+r"(loops) - : - : "cc"); + " lp 1f \n" + " nop \n" + "1: \n" + : "+l"(loops)); } extern void __bad_udelay(void); diff --git a/kernel/arch/arc/include/asm/dma-mapping.h b/kernel/arch/arc/include/asm/dma-mapping.h index 45b8e0cea..2d28ba939 100644 --- a/kernel/arch/arc/include/asm/dma-mapping.h +++ b/kernel/arch/arc/include/asm/dma-mapping.h @@ -14,23 +14,6 @@ #include #include -#ifndef CONFIG_ARC_PLAT_NEEDS_CPU_TO_DMA -/* - * dma_map_* API take cpu addresses, which is kernel logical address in the - * untranslated address space (0x8000_0000) based. The dma address (bus addr) - * ideally needs to be 0x0000_0000 based hence these glue routines. - * However given that intermediate bus bridges can ignore the high bit, we can - * do with these routines being no-ops. - * If a platform/device comes up which sriclty requires 0 based bus addr - * (e.g. AHB-PCI bridge on Angel4 board), then it can provide it's own versions - */ -#define plat_dma_addr_to_kernel(dev, addr) ((unsigned long)(addr)) -#define plat_kernel_addr_to_dma(dev, ptr) ((dma_addr_t)(ptr)) - -#else -#include -#endif - void *dma_alloc_noncoherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp); @@ -94,7 +77,7 @@ dma_map_single(struct device *dev, void *cpu_addr, size_t size, enum dma_data_direction dir) { _dma_cache_sync((unsigned long)cpu_addr, size, dir); - return plat_kernel_addr_to_dma(dev, cpu_addr); + return (dma_addr_t)cpu_addr; } static inline void @@ -147,16 +130,14 @@ static inline void dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, size_t size, enum dma_data_direction dir) { - _dma_cache_sync(plat_dma_addr_to_kernel(dev, dma_handle), size, - DMA_FROM_DEVICE); + _dma_cache_sync(dma_handle, size, DMA_FROM_DEVICE); } static inline void dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle, size_t size, enum dma_data_direction dir) { - _dma_cache_sync(plat_dma_addr_to_kernel(dev, dma_handle), size, - DMA_TO_DEVICE); + _dma_cache_sync(dma_handle, size, DMA_TO_DEVICE); } static inline void @@ -164,8 +145,7 @@ dma_sync_single_range_for_cpu(struct device *dev, dma_addr_t dma_handle, unsigned long offset, size_t size, enum dma_data_direction direction) { - _dma_cache_sync(plat_dma_addr_to_kernel(dev, dma_handle) + offset, - size, DMA_FROM_DEVICE); + _dma_cache_sync(dma_handle + offset, size, DMA_FROM_DEVICE); } static inline void @@ -173,27 +153,28 @@ dma_sync_single_range_for_device(struct device *dev, dma_addr_t dma_handle, unsigned long offset, size_t size, enum dma_data_direction direction) { - _dma_cache_sync(plat_dma_addr_to_kernel(dev, dma_handle) + offset, - size, DMA_TO_DEVICE); + _dma_cache_sync(dma_handle + offset, size, DMA_TO_DEVICE); } static inline void -dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nelems, +dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sglist, int nelems, enum dma_data_direction dir) { int i; + struct scatterlist *sg; - for (i = 0; i < nelems; i++, sg++) + for_each_sg(sglist, sg, nelems, i) _dma_cache_sync((unsigned int)sg_virt(sg), sg->length, dir); } static inline void -dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nelems, - enum dma_data_direction dir) +dma_sync_sg_for_device(struct device *dev, struct scatterlist *sglist, + int nelems, enum dma_data_direction dir) { int i; + struct scatterlist *sg; - for (i = 0; i < nelems; i++, sg++) + for_each_sg(sglist, sg, nelems, i) _dma_cache_sync((unsigned int)sg_virt(sg), sg->length, dir); } diff --git a/kernel/arch/arc/include/asm/elf.h b/kernel/arch/arc/include/asm/elf.h index a26282857..51a99e25f 100644 --- a/kernel/arch/arc/include/asm/elf.h +++ b/kernel/arch/arc/include/asm/elf.h @@ -15,6 +15,11 @@ /* These ELF defines belong to uapi but libc elf.h already defines them */ #define EM_ARCOMPACT 93 +#define EM_ARCV2 195 /* ARCv2 Cores */ + +#define EM_ARC_INUSE (IS_ENABLED(CONFIG_ISA_ARCOMPACT) ? \ + EM_ARCOMPACT : EM_ARCV2) + /* ARC Relocations (kernel Modules only) */ #define R_ARC_32 0x4 #define R_ARC_32_ME 0x1B diff --git a/kernel/arch/arc/include/asm/entry-arcv2.h b/kernel/arch/arc/include/asm/entry-arcv2.h new file mode 100644 index 000000000..b5ff87e6f --- /dev/null +++ b/kernel/arch/arc/include/asm/entry-arcv2.h @@ -0,0 +1,190 @@ + +#ifndef __ASM_ARC_ENTRY_ARCV2_H +#define __ASM_ARC_ENTRY_ARCV2_H + +#include +#include +#include /* For THREAD_SIZE */ + +/*------------------------------------------------------------------------*/ +.macro INTERRUPT_PROLOGUE called_from + + ; Before jumping to Interrupt Vector, hardware micro-ops did following: + ; 1. SP auto-switched to kernel mode stack + ; 2. STATUS32.Z flag set to U mode at time of interrupt (U:1, K:0) + ; 3. Auto saved: r0-r11, blink, LPE,LPS,LPC, JLI,LDI,EI, PC, STAT32 + ; + ; Now manually save: r12, sp, fp, gp, r25 + + PUSH r12 + + ; Saving pt_regs->sp correctly requires some extra work due to the way + ; Auto stack switch works + ; - U mode: retrieve it from AUX_USER_SP + ; - K mode: add the offset from current SP where H/w starts auto push + ; + ; Utilize the fact that Z bit is set if Intr taken in U mode + mov.nz r9, sp + add.nz r9, r9, SZ_PT_REGS - PT_sp - 4 + bnz 1f + + lr r9, [AUX_USER_SP] +1: + PUSH r9 ; SP + + PUSH fp + PUSH gp + +#ifdef CONFIG_ARC_CURR_IN_REG + PUSH r25 ; user_r25 + GET_CURR_TASK_ON_CPU r25 +#else + sub sp, sp, 4 +#endif + +.ifnc \called_from, exception + sub sp, sp, 12 ; BTA/ECR/orig_r0 placeholder per pt_regs +.endif + +.endm + +/*------------------------------------------------------------------------*/ +.macro INTERRUPT_EPILOGUE called_from + +.ifnc \called_from, exception + add sp, sp, 12 ; skip BTA/ECR/orig_r0 placeholderss +.endif + +#ifdef CONFIG_ARC_CURR_IN_REG + POP r25 +#else + add sp, sp, 4 +#endif + + POP gp + POP fp + + ; Don't touch AUX_USER_SP if returning to K mode (Z bit set) + ; (Z bit set on K mode is inverse of INTERRUPT_PROLOGUE) + add.z sp, sp, 4 + bz 1f + + POPAX AUX_USER_SP +1: + POP r12 + +.endm + +/*------------------------------------------------------------------------*/ +.macro EXCEPTION_PROLOGUE + + ; Before jumping to Exception Vector, hardware micro-ops did following: + ; 1. SP auto-switched to kernel mode stack + ; 2. STATUS32.Z flag set to U mode at time of interrupt (U:1,K:0) + ; + ; Now manually save the complete reg file + + PUSH r9 ; freeup a register: slot of erstatus + + PUSHAX eret + sub sp, sp, 12 ; skip JLI, LDI, EI + PUSH lp_count + PUSHAX lp_start + PUSHAX lp_end + PUSH blink + + PUSH r11 + PUSH r10 + + ld.as r9, [sp, 10] ; load stashed r9 (status32 stack slot) + lr r10, [erstatus] + st.as r10, [sp, 10] ; save status32 at it's right stack slot + + PUSH r9 + PUSH r8 + PUSH r7 + PUSH r6 + PUSH r5 + PUSH r4 + PUSH r3 + PUSH r2 + PUSH r1 + PUSH r0 + + ; -- for interrupts, regs above are auto-saved by h/w in that order -- + ; Now do what ISR prologue does (manually save r12, sp, fp, gp, r25) + ; + ; Set Z flag if this was from U mode (expected by INTERRUPT_PROLOGUE) + ; Although H/w exception micro-ops do set Z flag for U mode (just like + ; for interrupts), it could get clobbered in case we soft land here from + ; a TLB Miss exception handler (tlbex.S) + + and r10, r10, STATUS_U_MASK + xor.f 0, r10, STATUS_U_MASK + + INTERRUPT_PROLOGUE exception + + PUSHAX erbta + PUSHAX ecr ; r9 contains ECR, expected by EV_Trap + + PUSH r0 ; orig_r0 +.endm + +/*------------------------------------------------------------------------*/ +.macro EXCEPTION_EPILOGUE + + ; Assumes r0 has PT_status32 + btst r0, STATUS_U_BIT ; Z flag set if K, used in INTERRUPT_EPILOGUE + + add sp, sp, 8 ; orig_r0/ECR don't need restoring + POPAX erbta + + INTERRUPT_EPILOGUE exception + + POP r0 + POP r1 + POP r2 + POP r3 + POP r4 + POP r5 + POP r6 + POP r7 + POP r8 + POP r9 + POP r10 + POP r11 + + POP blink + POPAX lp_end + POPAX lp_start + + POP r9 + mov lp_count, r9 + + add sp, sp, 12 ; skip JLI, LDI, EI + POPAX eret + POPAX erstatus + + ld.as r9, [sp, -12] ; reload r9 which got clobbered +.endm + +.macro FAKE_RET_FROM_EXCPN + lr r9, [status32] + bic r9, r9, (STATUS_U_MASK|STATUS_DE_MASK|STATUS_AE_MASK) + or r9, r9, (STATUS_L_MASK|STATUS_IE_MASK) + kflag r9 +.endm + +/* Get thread_info of "current" tsk */ +.macro GET_CURR_THR_INFO_FROM_SP reg + bmskn \reg, sp, THREAD_SHIFT - 1 +.endm + +/* Get CPU-ID of this core */ +.macro GET_CPU_ID reg + lr \reg, [identity] + xbfu \reg, \reg, 0xE8 /* 00111 01000 */ + /* M = 8-1 N = 8 */ +.endm + +#endif diff --git a/kernel/arch/arc/include/asm/entry-compact.h b/kernel/arch/arc/include/asm/entry-compact.h new file mode 100644 index 000000000..1aff3be91 --- /dev/null +++ b/kernel/arch/arc/include/asm/entry-compact.h @@ -0,0 +1,306 @@ +/* + * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com) + * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Vineetg: March 2009 (Supporting 2 levels of Interrupts) + * Stack switching code can no longer reliably rely on the fact that + * if we are NOT in user mode, stack is switched to kernel mode. + * e.g. L2 IRQ interrupted a L1 ISR which had not yet completed + * it's prologue including stack switching from user mode + * + * Vineetg: Aug 28th 2008: Bug #94984 + * -Zero Overhead Loop Context shd be cleared when entering IRQ/EXcp/Trap + * Normally CPU does this automatically, however when doing FAKE rtie, + * we also need to explicitly do this. The problem in macros + * FAKE_RET_FROM_EXCPN and FAKE_RET_FROM_EXCPN_LOCK_IRQ was that this bit + * was being "CLEARED" rather then "SET". Actually "SET" clears ZOL context + * + * Vineetg: May 5th 2008 + * -Modified CALLEE_REG save/restore macros to handle the fact that + * r25 contains the kernel current task ptr + * - Defined Stack Switching Macro to be reused in all intr/excp hdlrs + * - Shaved off 11 instructions from RESTORE_ALL_INT1 by using the + * address Write back load ld.ab instead of seperate ld/add instn + * + * Amit Bhor, Sameer Dhavale: Codito Technologies 2004 + */ + +#ifndef __ASM_ARC_ENTRY_COMPACT_H +#define __ASM_ARC_ENTRY_COMPACT_H + +#include +#include +#include /* For THREAD_SIZE */ + +/*-------------------------------------------------------------- + * Switch to Kernel Mode stack if SP points to User Mode stack + * + * Entry : r9 contains pre-IRQ/exception/trap status32 + * Exit : SP set to K mode stack + * SP at the time of entry (K/U) saved @ pt_regs->sp + * Clobbers: r9 + *-------------------------------------------------------------*/ + +.macro SWITCH_TO_KERNEL_STK + + /* User Mode when this happened ? Yes: Proceed to switch stack */ + bbit1 r9, STATUS_U_BIT, 88f + + /* OK we were already in kernel mode when this event happened, thus can + * assume SP is kernel mode SP. _NO_ need to do any stack switching + */ + +#ifdef CONFIG_ARC_COMPACT_IRQ_LEVELS + /* However.... + * If Level 2 Interrupts enabled, we may end up with a corner case: + * 1. User Task executing + * 2. L1 IRQ taken, ISR starts (CPU auto-switched to KERNEL mode) + * 3. But before it could switch SP from USER to KERNEL stack + * a L2 IRQ "Interrupts" L1 + * Thay way although L2 IRQ happened in Kernel mode, stack is still + * not switched. + * To handle this, we may need to switch stack even if in kernel mode + * provided SP has values in range of USER mode stack ( < 0x7000_0000 ) + */ + brlo sp, VMALLOC_START, 88f + + /* TODO: vineetg: + * We need to be a bit more cautious here. What if a kernel bug in + * L1 ISR, caused SP to go whaco (some small value which looks like + * USER stk) and then we take L2 ISR. + * Above brlo alone would treat it as a valid L1-L2 sceanrio + * instead of shouting alound + * The only feasible way is to make sure this L2 happened in + * L1 prelogue ONLY i.e. ilink2 is less than a pre-set marker in + * L1 ISR before it switches stack + */ + +#endif + + /*------Intr/Ecxp happened in kernel mode, SP already setup ------ */ + /* save it nevertheless @ pt_regs->sp for uniformity */ + + b.d 66f + st sp, [sp, PT_sp - SZ_PT_REGS] + +88: /*------Intr/Ecxp happened in user mode, "switch" stack ------ */ + + GET_CURR_TASK_ON_CPU r9 + + /* With current tsk in r9, get it's kernel mode stack base */ + GET_TSK_STACK_BASE r9, r9 + + /* save U mode SP @ pt_regs->sp */ + st sp, [r9, PT_sp - SZ_PT_REGS] + + /* final SP switch */ + mov sp, r9 +66: +.endm + +/*------------------------------------------------------------ + * "FAKE" a rtie to return from CPU Exception context + * This is to re-enable Exceptions within exception + * Look at EV_ProtV to see how this is actually used + *-------------------------------------------------------------*/ + +.macro FAKE_RET_FROM_EXCPN + + lr r9, [status32] + bclr r9, r9, STATUS_AE_BIT + or r9, r9, (STATUS_E1_MASK|STATUS_E2_MASK) + sr r9, [erstatus] + mov r9, 55f + sr r9, [eret] + rtie +55: +.endm + +/*-------------------------------------------------------------- + * For early Exception/ISR Prologue, a core reg is temporarily needed to + * code the rest of prolog (stack switching). This is done by stashing + * it to memory (non-SMP case) or SCRATCH0 Aux Reg (SMP). + * + * Before saving the full regfile - this reg is restored back, only + * to be saved again on kernel mode stack, as part of pt_regs. + *-------------------------------------------------------------*/ +.macro PROLOG_FREEUP_REG reg, mem +#ifdef CONFIG_SMP + sr \reg, [ARC_REG_SCRATCH_DATA0] +#else + st \reg, [\mem] +#endif +.endm + +.macro PROLOG_RESTORE_REG reg, mem +#ifdef CONFIG_SMP + lr \reg, [ARC_REG_SCRATCH_DATA0] +#else + ld \reg, [\mem] +#endif +.endm + +/*-------------------------------------------------------------- + * Exception Entry prologue + * -Switches stack to K mode (if not already) + * -Saves the register file + * + * After this it is safe to call the "C" handlers + *-------------------------------------------------------------*/ +.macro EXCEPTION_PROLOGUE + + /* Need at least 1 reg to code the early exception prologue */ + PROLOG_FREEUP_REG r9, @ex_saved_reg1 + + /* U/K mode at time of exception (stack not switched if already K) */ + lr r9, [erstatus] + + /* ARC700 doesn't provide auto-stack switching */ + SWITCH_TO_KERNEL_STK + +#ifdef CONFIG_ARC_CURR_IN_REG + /* Treat r25 as scratch reg (save on stack) and load with "current" */ + PUSH r25 + GET_CURR_TASK_ON_CPU r25 +#else + sub sp, sp, 4 +#endif + + st.a r0, [sp, -8] /* orig_r0 needed for syscall (skip ECR slot) */ + sub sp, sp, 4 /* skip pt_regs->sp, already saved above */ + + /* Restore r9 used to code the early prologue */ + PROLOG_RESTORE_REG r9, @ex_saved_reg1 + + /* now we are ready to save the regfile */ + SAVE_R0_TO_R12 + PUSH gp + PUSH fp + PUSH blink + PUSHAX eret + PUSHAX erstatus + PUSH lp_count + PUSHAX lp_end + PUSHAX lp_start + PUSHAX erbta + + lr r9, [ecr] + st r9, [sp, PT_event] /* EV_Trap expects r9 to have ECR */ +.endm + +/*-------------------------------------------------------------- + * Restore all registers used by system call or Exceptions + * SP should always be pointing to the next free stack element + * when entering this macro. + * + * NOTE: + * + * It is recommended that lp_count/ilink1/ilink2 not be used as a dest reg + * for memory load operations. If used in that way interrupts are deffered + * by hardware and that is not good. + *-------------------------------------------------------------*/ +.macro EXCEPTION_EPILOGUE + POPAX erbta + POPAX lp_start + POPAX lp_end + + POP r9 + mov lp_count, r9 ;LD to lp_count is not allowed + + POPAX erstatus + POPAX eret + POP blink + POP fp + POP gp + RESTORE_R12_TO_R0 + + ld sp, [sp] /* restore original sp */ + /* orig_r0, ECR, user_r25 skipped automatically */ +.endm + +/* Dummy ECR values for Interrupts */ +#define event_IRQ1 0x0031abcd +#define event_IRQ2 0x0032abcd + +.macro INTERRUPT_PROLOGUE LVL + + /* free up r9 as scratchpad */ + PROLOG_FREEUP_REG r9, @int\LVL\()_saved_reg + + /* Which mode (user/kernel) was the system in when intr occured */ + lr r9, [status32_l\LVL\()] + + SWITCH_TO_KERNEL_STK + +#ifdef CONFIG_ARC_CURR_IN_REG + /* Treat r25 as scratch reg (save on stack) and load with "current" */ + PUSH r25 + GET_CURR_TASK_ON_CPU r25 +#else + sub sp, sp, 4 +#endif + + PUSH 0x003\LVL\()abcd /* Dummy ECR */ + sub sp, sp, 8 /* skip orig_r0 (not needed) + skip pt_regs->sp, already saved above */ + + /* Restore r9 used to code the early prologue */ + PROLOG_RESTORE_REG r9, @int\LVL\()_saved_reg + + SAVE_R0_TO_R12 + PUSH gp + PUSH fp + PUSH blink + PUSH ilink\LVL\() + PUSHAX status32_l\LVL\() + PUSH lp_count + PUSHAX lp_end + PUSHAX lp_start + PUSHAX bta_l\LVL\() +.endm + +/*-------------------------------------------------------------- + * Restore all registers used by interrupt handlers. + * + * NOTE: + * + * It is recommended that lp_count/ilink1/ilink2 not be used as a dest reg + * for memory load operations. If used in that way interrupts are deffered + * by hardware and that is not good. + *-------------------------------------------------------------*/ +.macro INTERRUPT_EPILOGUE LVL + POPAX bta_l\LVL\() + POPAX lp_start + POPAX lp_end + + POP r9 + mov lp_count, r9 ;LD to lp_count is not allowed + + POPAX status32_l\LVL\() + POP ilink\LVL\() + POP blink + POP fp + POP gp + RESTORE_R12_TO_R0 + + ld sp, [sp] /* restore original sp */ + /* orig_r0, ECR, user_r25 skipped automatically */ +.endm + +/* Get thread_info of "current" tsk */ +.macro GET_CURR_THR_INFO_FROM_SP reg + bic \reg, sp, (THREAD_SIZE - 1) +.endm + +/* Get CPU-ID of this core */ +.macro GET_CPU_ID reg + lr \reg, [identity] + lsr \reg, \reg, 8 + bmsk \reg, \reg, 7 +.endm + +#endif /* __ASM_ARC_ENTRY_COMPACT_H */ diff --git a/kernel/arch/arc/include/asm/entry.h b/kernel/arch/arc/include/asm/entry.h index 884081099..ad7860c5c 100644 --- a/kernel/arch/arc/include/asm/entry.h +++ b/kernel/arch/arc/include/asm/entry.h @@ -1,45 +1,27 @@ /* + * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com) * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. - * - * Vineetg: March 2009 (Supporting 2 levels of Interrupts) - * Stack switching code can no longer reliably rely on the fact that - * if we are NOT in user mode, stack is switched to kernel mode. - * e.g. L2 IRQ interrupted a L1 ISR which had not yet completed - * it's prologue including stack switching from user mode - * - * Vineetg: Aug 28th 2008: Bug #94984 - * -Zero Overhead Loop Context shd be cleared when entering IRQ/EXcp/Trap - * Normally CPU does this automatically, however when doing FAKE rtie, - * we also need to explicitly do this. The problem in macros - * FAKE_RET_FROM_EXCPN and FAKE_RET_FROM_EXCPN_LOCK_IRQ was that this bit - * was being "CLEARED" rather then "SET". Actually "SET" clears ZOL context - * - * Vineetg: May 5th 2008 - * -Modified CALLEE_REG save/restore macros to handle the fact that - * r25 contains the kernel current task ptr - * - Defined Stack Switching Macro to be reused in all intr/excp hdlrs - * - Shaved off 11 instructions from RESTORE_ALL_INT1 by using the - * address Write back load ld.ab instead of seperate ld/add instn - * - * Amit Bhor, Sameer Dhavale: Codito Technologies 2004 */ #ifndef __ASM_ARC_ENTRY_H #define __ASM_ARC_ENTRY_H -#ifdef __ASSEMBLY__ #include /* For NR_syscalls defination */ -#include #include #include #include /* For VMALLOC_START */ -#include /* For THREAD_SIZE */ #include +#ifdef CONFIG_ISA_ARCOMPACT +#include /* ISA specific bits */ +#else +#include +#endif + /* Note on the LD/ST addr modes with addr reg wback * * LD.a same as LD.aw @@ -143,8 +125,6 @@ POP r13 .endm -#define OFF_USER_R25_FROM_R24 (SZ_CALLEE_REGS + SZ_PT_REGS - 8)/4 - /*-------------------------------------------------------------- * Collect User Mode callee regs as struct callee_regs - needed by * fork/do_signal/unaligned-access-emulation. @@ -157,12 +137,13 @@ *-------------------------------------------------------------*/ .macro SAVE_CALLEE_SAVED_USER + mov r12, sp ; save SP as ref to pt_regs SAVE_R13_TO_R24 #ifdef CONFIG_ARC_CURR_IN_REG - ; Retrieve orig r25 and save it on stack - ld.as r12, [sp, OFF_USER_R25_FROM_R24] - st.a r12, [sp, -4] + ; Retrieve orig r25 and save it with rest of callee_regs + ld.as r12, [r12, PT_user_r25] + PUSH r12 #else PUSH r25 #endif @@ -209,12 +190,16 @@ .macro RESTORE_CALLEE_SAVED_USER #ifdef CONFIG_ARC_CURR_IN_REG - ld.ab r12, [sp, 4] - st.as r12, [sp, OFF_USER_R25_FROM_R24] + POP r12 #else POP r25 #endif RESTORE_R24_TO_R13 + + ; SP is back to start of pt_regs +#ifdef CONFIG_ARC_CURR_IN_REG + st.as r12, [sp, PT_user_r25] +#endif .endm /*-------------------------------------------------------------- @@ -240,117 +225,6 @@ .endm -/*-------------------------------------------------------------- - * Switch to Kernel Mode stack if SP points to User Mode stack - * - * Entry : r9 contains pre-IRQ/exception/trap status32 - * Exit : SP is set to kernel mode stack pointer - * If CURR_IN_REG, r25 set to "current" task pointer - * Clobbers: r9 - *-------------------------------------------------------------*/ - -.macro SWITCH_TO_KERNEL_STK - - /* User Mode when this happened ? Yes: Proceed to switch stack */ - bbit1 r9, STATUS_U_BIT, 88f - - /* OK we were already in kernel mode when this event happened, thus can - * assume SP is kernel mode SP. _NO_ need to do any stack switching - */ - -#ifdef CONFIG_ARC_COMPACT_IRQ_LEVELS - /* However.... - * If Level 2 Interrupts enabled, we may end up with a corner case: - * 1. User Task executing - * 2. L1 IRQ taken, ISR starts (CPU auto-switched to KERNEL mode) - * 3. But before it could switch SP from USER to KERNEL stack - * a L2 IRQ "Interrupts" L1 - * Thay way although L2 IRQ happened in Kernel mode, stack is still - * not switched. - * To handle this, we may need to switch stack even if in kernel mode - * provided SP has values in range of USER mode stack ( < 0x7000_0000 ) - */ - brlo sp, VMALLOC_START, 88f - - /* TODO: vineetg: - * We need to be a bit more cautious here. What if a kernel bug in - * L1 ISR, caused SP to go whaco (some small value which looks like - * USER stk) and then we take L2 ISR. - * Above brlo alone would treat it as a valid L1-L2 sceanrio - * instead of shouting alound - * The only feasible way is to make sure this L2 happened in - * L1 prelogue ONLY i.e. ilink2 is less than a pre-set marker in - * L1 ISR before it switches stack - */ - -#endif - - /* Save Pre Intr/Exception KERNEL MODE SP on kernel stack - * safe-keeping not really needed, but it keeps the epilogue code - * (SP restore) simpler/uniform. - */ - b.d 66f - mov r9, sp - -88: /*------Intr/Ecxp happened in user mode, "switch" stack ------ */ - - GET_CURR_TASK_ON_CPU r9 - - /* With current tsk in r9, get it's kernel mode stack base */ - GET_TSK_STACK_BASE r9, r9 - -66: -#ifdef CONFIG_ARC_CURR_IN_REG - /* - * Treat r25 as scratch reg, save it on stack first - * Load it with current task pointer - */ - st r25, [r9, -4] - GET_CURR_TASK_ON_CPU r25 -#endif - - /* Save Pre Intr/Exception User SP on kernel stack */ - st.a sp, [r9, -16] ; Make room for orig_r0, ECR, user_r25 - - /* CAUTION: - * SP should be set at the very end when we are done with everything - * In case of 2 levels of interrupt we depend on value of SP to assume - * that everything else is done (loading r25 etc) - */ - - /* set SP to point to kernel mode stack */ - mov sp, r9 - - /* ----- Stack Switched to kernel Mode, Now save REG FILE ----- */ - -.endm - -/*------------------------------------------------------------ - * "FAKE" a rtie to return from CPU Exception context - * This is to re-enable Exceptions within exception - * Look at EV_ProtV to see how this is actually used - *-------------------------------------------------------------*/ - -.macro FAKE_RET_FROM_EXCPN reg - - ld \reg, [sp, PT_status32] - bic \reg, \reg, (STATUS_U_MASK|STATUS_DE_MASK) - bset \reg, \reg, STATUS_L_BIT - sr \reg, [erstatus] - mov \reg, 55f - sr \reg, [eret] - - rtie -55: -.endm - -/* - * @reg [OUT] &thread_info of "current" - */ -.macro GET_CURR_THR_INFO_FROM_SP reg - bic \reg, sp, (THREAD_SIZE - 1) -.endm - /* * @reg [OUT] thread_info->flags of "current" */ @@ -359,222 +233,6 @@ ld \reg, [\reg, THREAD_INFO_FLAGS] .endm -/*-------------------------------------------------------------- - * For early Exception Prologue, a core reg is temporarily needed to - * code the rest of prolog (stack switching). This is done by stashing - * it to memory (non-SMP case) or SCRATCH0 Aux Reg (SMP). - * - * Before saving the full regfile - this reg is restored back, only - * to be saved again on kernel mode stack, as part of pt_regs. - *-------------------------------------------------------------*/ -.macro EXCPN_PROLOG_FREEUP_REG reg -#ifdef CONFIG_SMP - sr \reg, [ARC_REG_SCRATCH_DATA0] -#else - st \reg, [@ex_saved_reg1] -#endif -.endm - -.macro EXCPN_PROLOG_RESTORE_REG reg -#ifdef CONFIG_SMP - lr \reg, [ARC_REG_SCRATCH_DATA0] -#else - ld \reg, [@ex_saved_reg1] -#endif -.endm - -/*-------------------------------------------------------------- - * Exception Entry prologue - * -Switches stack to K mode (if not already) - * -Saves the register file - * - * After this it is safe to call the "C" handlers - *-------------------------------------------------------------*/ -.macro EXCEPTION_PROLOGUE - - /* Need at least 1 reg to code the early exception prologue */ - EXCPN_PROLOG_FREEUP_REG r9 - - /* U/K mode at time of exception (stack not switched if already K) */ - lr r9, [erstatus] - - /* ARC700 doesn't provide auto-stack switching */ - SWITCH_TO_KERNEL_STK - - /* save the regfile */ - SAVE_ALL_SYS -.endm - -/*-------------------------------------------------------------- - * Save all registers used by Exceptions (TLB Miss, Prot-V, Mem err etc) - * Requires SP to be already switched to kernel mode Stack - * sp points to the next free element on the stack at exit of this macro. - * Registers are pushed / popped in the order defined in struct ptregs - * in asm/ptrace.h - * Note that syscalls are implemented via TRAP which is also a exception - * from CPU's point of view - *-------------------------------------------------------------*/ -.macro SAVE_ALL_SYS - - lr r9, [ecr] - st r9, [sp, 8] /* ECR */ - st r0, [sp, 4] /* orig_r0, needed only for sys calls */ - - /* Restore r9 used to code the early prologue */ - EXCPN_PROLOG_RESTORE_REG r9 - - SAVE_R0_TO_R12 - PUSH gp - PUSH fp - PUSH blink - PUSHAX eret - PUSHAX erstatus - PUSH lp_count - PUSHAX lp_end - PUSHAX lp_start - PUSHAX erbta -.endm - -/*-------------------------------------------------------------- - * Restore all registers used by system call or Exceptions - * SP should always be pointing to the next free stack element - * when entering this macro. - * - * NOTE: - * - * It is recommended that lp_count/ilink1/ilink2 not be used as a dest reg - * for memory load operations. If used in that way interrupts are deffered - * by hardware and that is not good. - *-------------------------------------------------------------*/ -.macro RESTORE_ALL_SYS - POPAX erbta - POPAX lp_start - POPAX lp_end - - POP r9 - mov lp_count, r9 ;LD to lp_count is not allowed - - POPAX erstatus - POPAX eret - POP blink - POP fp - POP gp - RESTORE_R12_TO_R0 - - ld sp, [sp] /* restore original sp */ - /* orig_r0, ECR, user_r25 skipped automatically */ -.endm - - -/*-------------------------------------------------------------- - * Save all registers used by interrupt handlers. - *-------------------------------------------------------------*/ -.macro SAVE_ALL_INT1 - - /* restore original r9 to be saved as part of reg-file */ -#ifdef CONFIG_SMP - lr r9, [ARC_REG_SCRATCH_DATA0] -#else - ld r9, [@int1_saved_reg] -#endif - - /* now we are ready to save the remaining context :) */ - st event_IRQ1, [sp, 8] /* Dummy ECR */ - st 0, [sp, 4] /* orig_r0 , N/A for IRQ */ - - SAVE_R0_TO_R12 - PUSH gp - PUSH fp - PUSH blink - PUSH ilink1 - PUSHAX status32_l1 - PUSH lp_count - PUSHAX lp_end - PUSHAX lp_start - PUSHAX bta_l1 -.endm - -.macro SAVE_ALL_INT2 - - /* TODO-vineetg: SMP we can't use global nor can we use - * SCRATCH0 as we do for int1 because while int1 is using - * it, int2 can come - */ - /* retsore original r9 , saved in sys_saved_r9 */ - ld r9, [@int2_saved_reg] - - /* now we are ready to save the remaining context :) */ - st event_IRQ2, [sp, 8] /* Dummy ECR */ - st 0, [sp, 4] /* orig_r0 , N/A for IRQ */ - - SAVE_R0_TO_R12 - PUSH gp - PUSH fp - PUSH blink - PUSH ilink2 - PUSHAX status32_l2 - PUSH lp_count - PUSHAX lp_end - PUSHAX lp_start - PUSHAX bta_l2 -.endm - -/*-------------------------------------------------------------- - * Restore all registers used by interrupt handlers. - * - * NOTE: - * - * It is recommended that lp_count/ilink1/ilink2 not be used as a dest reg - * for memory load operations. If used in that way interrupts are deffered - * by hardware and that is not good. - *-------------------------------------------------------------*/ - -.macro RESTORE_ALL_INT1 - POPAX bta_l1 - POPAX lp_start - POPAX lp_end - - POP r9 - mov lp_count, r9 ;LD to lp_count is not allowed - - POPAX status32_l1 - POP ilink1 - POP blink - POP fp - POP gp - RESTORE_R12_TO_R0 - - ld sp, [sp] /* restore original sp */ - /* orig_r0, ECR, user_r25 skipped automatically */ -.endm - -.macro RESTORE_ALL_INT2 - POPAX bta_l2 - POPAX lp_start - POPAX lp_end - - POP r9 - mov lp_count, r9 ;LD to lp_count is not allowed - - POPAX status32_l2 - POP ilink2 - POP blink - POP fp - POP gp - RESTORE_R12_TO_R0 - - ld sp, [sp] /* restore original sp */ - /* orig_r0, ECR, user_r25 skipped automatically */ -.endm - - -/* Get CPU-ID of this core */ -.macro GET_CPU_ID reg - lr \reg, [identity] - lsr \reg, \reg, 8 - bmsk \reg, \reg, 7 -.endm - #ifdef CONFIG_SMP /*------------------------------------------------- @@ -643,6 +301,4 @@ #endif /* CONFIG_ARC_CURR_IN_REG */ -#endif /* __ASSEMBLY__ */ - #endif /* __ASM_ARC_ENTRY_H */ diff --git a/kernel/arch/arc/include/asm/futex.h b/kernel/arch/arc/include/asm/futex.h index 05b5aaf5b..11e1b1f3a 100644 --- a/kernel/arch/arc/include/asm/futex.h +++ b/kernel/arch/arc/include/asm/futex.h @@ -16,18 +16,22 @@ #include #include +#ifdef CONFIG_ARC_HAS_LLSC + #define __futex_atomic_op(insn, ret, oldval, uaddr, oparg)\ \ + smp_mb(); \ __asm__ __volatile__( \ - "1: ld %1, [%2] \n" \ + "1: llock %1, [%2] \n" \ insn "\n" \ - "2: st %0, [%2] \n" \ + "2: scond %0, [%2] \n" \ + " bnz 1b \n" \ " mov %0, 0 \n" \ "3: \n" \ " .section .fixup,\"ax\" \n" \ " .align 4 \n" \ "4: mov %0, %4 \n" \ - " b 3b \n" \ + " j 3b \n" \ " .previous \n" \ " .section __ex_table,\"a\" \n" \ " .align 4 \n" \ @@ -37,7 +41,37 @@ \ : "=&r" (ret), "=&r" (oldval) \ : "r" (uaddr), "r" (oparg), "ir" (-EFAULT) \ - : "cc", "memory") + : "cc", "memory"); \ + smp_mb() \ + +#else /* !CONFIG_ARC_HAS_LLSC */ + +#define __futex_atomic_op(insn, ret, oldval, uaddr, oparg)\ + \ + smp_mb(); \ + __asm__ __volatile__( \ + "1: ld %1, [%2] \n" \ + insn "\n" \ + "2: st %0, [%2] \n" \ + " mov %0, 0 \n" \ + "3: \n" \ + " .section .fixup,\"ax\" \n" \ + " .align 4 \n" \ + "4: mov %0, %4 \n" \ + " j 3b \n" \ + " .previous \n" \ + " .section __ex_table,\"a\" \n" \ + " .align 4 \n" \ + " .word 1b, 4b \n" \ + " .word 2b, 4b \n" \ + " .previous \n" \ + \ + : "=&r" (ret), "=&r" (oldval) \ + : "r" (uaddr), "r" (oparg), "ir" (-EFAULT) \ + : "cc", "memory"); \ + smp_mb() \ + +#endif static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr) { @@ -53,6 +87,9 @@ static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr) if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) return -EFAULT; +#ifndef CONFIG_ARC_HAS_LLSC + preempt_disable(); /* to guarantee atomic r-m-w of futex op */ +#endif pagefault_disable(); switch (op) { @@ -60,6 +97,7 @@ static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr) __futex_atomic_op("mov %0, %3", ret, oldval, uaddr, oparg); break; case FUTEX_OP_ADD: + /* oldval = *uaddr; *uaddr += oparg ; ret = *uaddr */ __futex_atomic_op("add %0, %1, %3", ret, oldval, uaddr, oparg); break; case FUTEX_OP_OR: @@ -76,6 +114,9 @@ static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr) } pagefault_enable(); +#ifndef CONFIG_ARC_HAS_LLSC + preempt_enable(); +#endif if (!ret) { switch (cmp) { @@ -104,48 +145,57 @@ static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr) return ret; } -/* Compare-xchg with pagefaults disabled. - * Notes: - * -Best-Effort: Exchg happens only if compare succeeds. - * If compare fails, returns; leaving retry/looping to upper layers - * -successful cmp-xchg: return orig value in @addr (same as cmp val) - * -Compare fails: return orig value in @addr - * -user access r/w fails: return -EFAULT +/* + * cmpxchg of futex (pagefaults disabled by caller) + * Return 0 for success, -EFAULT otherwise */ static inline int -futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, u32 oldval, - u32 newval) +futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, u32 expval, + u32 newval) { - u32 val; + int ret = 0; + u32 existval; - if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) + if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))) return -EFAULT; - pagefault_disable(); +#ifndef CONFIG_ARC_HAS_LLSC + preempt_disable(); /* to guarantee atomic r-m-w of futex op */ +#endif + smp_mb(); - /* TBD : can use llock/scond */ __asm__ __volatile__( - "1: ld %0, [%3] \n" - " brne %0, %1, 3f \n" - "2: st %2, [%3] \n" +#ifdef CONFIG_ARC_HAS_LLSC + "1: llock %1, [%4] \n" + " brne %1, %2, 3f \n" + "2: scond %3, [%4] \n" + " bnz 1b \n" +#else + "1: ld %1, [%4] \n" + " brne %1, %2, 3f \n" + "2: st %3, [%4] \n" +#endif "3: \n" " .section .fixup,\"ax\" \n" - "4: mov %0, %4 \n" - " b 3b \n" + "4: mov %0, %5 \n" + " j 3b \n" " .previous \n" " .section __ex_table,\"a\" \n" " .align 4 \n" " .word 1b, 4b \n" " .word 2b, 4b \n" " .previous\n" - : "=&r"(val) - : "r"(oldval), "r"(newval), "r"(uaddr), "ir"(-EFAULT) + : "+&r"(ret), "=&r"(existval) + : "r"(expval), "r"(newval), "r"(uaddr), "ir"(-EFAULT) : "cc", "memory"); - pagefault_enable(); + smp_mb(); - *uval = val; - return val; +#ifndef CONFIG_ARC_HAS_LLSC + preempt_enable(); +#endif + *uval = existval; + return ret; } #endif diff --git a/kernel/arch/arc/include/asm/highmem.h b/kernel/arch/arc/include/asm/highmem.h new file mode 100644 index 000000000..b1585c963 --- /dev/null +++ b/kernel/arch/arc/include/asm/highmem.h @@ -0,0 +1,61 @@ +/* + * Copyright (C) 2015 Synopsys, Inc. (www.synopsys.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#ifndef _ASM_HIGHMEM_H +#define _ASM_HIGHMEM_H + +#ifdef CONFIG_HIGHMEM + +#include +#include + +/* start after vmalloc area */ +#define FIXMAP_BASE (PAGE_OFFSET - FIXMAP_SIZE - PKMAP_SIZE) +#define FIXMAP_SIZE PGDIR_SIZE /* only 1 PGD worth */ +#define KM_TYPE_NR ((FIXMAP_SIZE >> PAGE_SHIFT)/NR_CPUS) +#define FIXMAP_ADDR(nr) (FIXMAP_BASE + ((nr) << PAGE_SHIFT)) + +/* start after fixmap area */ +#define PKMAP_BASE (FIXMAP_BASE + FIXMAP_SIZE) +#define PKMAP_SIZE PGDIR_SIZE +#define LAST_PKMAP (PKMAP_SIZE >> PAGE_SHIFT) +#define LAST_PKMAP_MASK (LAST_PKMAP - 1) +#define PKMAP_ADDR(nr) (PKMAP_BASE + ((nr) << PAGE_SHIFT)) +#define PKMAP_NR(virt) (((virt) - PKMAP_BASE) >> PAGE_SHIFT) + +#define kmap_prot PAGE_KERNEL + + +#include + +extern void *kmap(struct page *page); +extern void *kmap_high(struct page *page); +extern void *kmap_atomic(struct page *page); +extern void __kunmap_atomic(void *kvaddr); +extern void kunmap_high(struct page *page); + +extern void kmap_init(void); + +static inline void flush_cache_kmaps(void) +{ + flush_cache_all(); +} + +static inline void kunmap(struct page *page) +{ + BUG_ON(in_interrupt()); + if (!PageHighMem(page)) + return; + kunmap_high(page); +} + + +#endif + +#endif diff --git a/kernel/arch/arc/include/asm/hugepage.h b/kernel/arch/arc/include/asm/hugepage.h new file mode 100644 index 000000000..c5094de86 --- /dev/null +++ b/kernel/arch/arc/include/asm/hugepage.h @@ -0,0 +1,81 @@ +/* + * Copyright (C) 2013-15 Synopsys, Inc. (www.synopsys.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + + +#ifndef _ASM_ARC_HUGEPAGE_H +#define _ASM_ARC_HUGEPAGE_H + +#include +#include + +static inline pte_t pmd_pte(pmd_t pmd) +{ + return __pte(pmd_val(pmd)); +} + +static inline pmd_t pte_pmd(pte_t pte) +{ + return __pmd(pte_val(pte)); +} + +#define pmd_wrprotect(pmd) pte_pmd(pte_wrprotect(pmd_pte(pmd))) +#define pmd_mkwrite(pmd) pte_pmd(pte_mkwrite(pmd_pte(pmd))) +#define pmd_mkdirty(pmd) pte_pmd(pte_mkdirty(pmd_pte(pmd))) +#define pmd_mkold(pmd) pte_pmd(pte_mkold(pmd_pte(pmd))) +#define pmd_mkyoung(pmd) pte_pmd(pte_mkyoung(pmd_pte(pmd))) +#define pmd_mkhuge(pmd) pte_pmd(pte_mkhuge(pmd_pte(pmd))) +#define pmd_mknotpresent(pmd) pte_pmd(pte_mknotpresent(pmd_pte(pmd))) +#define pmd_mksplitting(pmd) pte_pmd(pte_mkspecial(pmd_pte(pmd))) +#define pmd_mkclean(pmd) pte_pmd(pte_mkclean(pmd_pte(pmd))) + +#define pmd_write(pmd) pte_write(pmd_pte(pmd)) +#define pmd_young(pmd) pte_young(pmd_pte(pmd)) +#define pmd_pfn(pmd) pte_pfn(pmd_pte(pmd)) +#define pmd_dirty(pmd) pte_dirty(pmd_pte(pmd)) +#define pmd_special(pmd) pte_special(pmd_pte(pmd)) + +#define mk_pmd(page, prot) pte_pmd(mk_pte(page, prot)) + +#define pmd_trans_huge(pmd) (pmd_val(pmd) & _PAGE_HW_SZ) +#define pmd_trans_splitting(pmd) (pmd_trans_huge(pmd) && pmd_special(pmd)) + +#define pfn_pmd(pfn, prot) (__pmd(((pfn) << PAGE_SHIFT) | pgprot_val(prot))) + +static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) +{ + /* + * open-coded pte_modify() with additional retaining of HW_SZ bit + * so that pmd_trans_huge() remains true for this PMD + */ + return __pmd((pmd_val(pmd) & (_PAGE_CHG_MASK | _PAGE_HW_SZ)) | pgprot_val(newprot)); +} + +static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr, + pmd_t *pmdp, pmd_t pmd) +{ + *pmdp = pmd; +} + +extern void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr, + pmd_t *pmd); + +#define has_transparent_hugepage() 1 + +/* Generic variants assume pgtable_t is struct page *, hence need for these */ +#define __HAVE_ARCH_PGTABLE_DEPOSIT +extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, + pgtable_t pgtable); + +#define __HAVE_ARCH_PGTABLE_WITHDRAW +extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp); + +#define __HAVE_ARCH_FLUSH_PMD_TLB_RANGE +extern void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start, + unsigned long end); + +#endif diff --git a/kernel/arch/arc/include/asm/io.h b/kernel/arch/arc/include/asm/io.h index cabd518cb..694ece8a0 100644 --- a/kernel/arch/arc/include/asm/io.h +++ b/kernel/arch/arc/include/asm/io.h @@ -20,6 +20,7 @@ extern void iounmap(const void __iomem *addr); #define ioremap_nocache(phy, sz) ioremap(phy, sz) #define ioremap_wc(phy, sz) ioremap(phy, sz) +#define ioremap_wt(phy, sz) ioremap(phy, sz) /* Change struct page to physical address */ #define page_to_phys(page) (page_to_pfn(page) << PAGE_SHIFT) @@ -98,9 +99,45 @@ static inline void __raw_writel(u32 w, volatile void __iomem *addr) } -#define readb_relaxed readb -#define readw_relaxed readw -#define readl_relaxed readl +#ifdef CONFIG_ISA_ARCV2 +#include +#define __iormb() rmb() +#define __iowmb() wmb() +#else +#define __iormb() do { } while (0) +#define __iowmb() do { } while (0) +#endif + +/* + * MMIO can also get buffered/optimized in micro-arch, so barriers needed + * Based on ARM model for the typical use case + * + * + * + * or: + * + * + * + * http://lkml.kernel.org/r/20150622133656.GG1583@arm.com + */ +#define readb(c) ({ u8 __v = readb_relaxed(c); __iormb(); __v; }) +#define readw(c) ({ u16 __v = readw_relaxed(c); __iormb(); __v; }) +#define readl(c) ({ u32 __v = readl_relaxed(c); __iormb(); __v; }) + +#define writeb(v,c) ({ __iowmb(); writeb_relaxed(v,c); }) +#define writew(v,c) ({ __iowmb(); writew_relaxed(v,c); }) +#define writel(v,c) ({ __iowmb(); writel_relaxed(v,c); }) + +/* + * Relaxed API for drivers which can handle any ordering themselves + */ +#define readb_relaxed(c) __raw_readb(c) +#define readw_relaxed(c) __raw_readw(c) +#define readl_relaxed(c) __raw_readl(c) + +#define writeb_relaxed(v,c) __raw_writeb(v,c) +#define writew_relaxed(v,c) __raw_writew(v,c) +#define writel_relaxed(v,c) __raw_writel(v,c) #include diff --git a/kernel/arch/arc/include/asm/irq.h b/kernel/arch/arc/include/asm/irq.h index f38652fb2..4fd7d62a6 100644 --- a/kernel/arch/arc/include/asm/irq.h +++ b/kernel/arch/arc/include/asm/irq.h @@ -13,8 +13,15 @@ #define NR_IRQS 128 /* allow some CPU external IRQ handling */ /* Platform Independent IRQs */ +#ifdef CONFIG_ISA_ARCOMPACT #define TIMER0_IRQ 3 #define TIMER1_IRQ 4 +#define IPI_IRQ (NR_CPU_IRQS-1) /* dummy to enable SMP build for up hardware */ +#else +#define TIMER0_IRQ 16 +#define TIMER1_IRQ 17 +#define IPI_IRQ 19 +#endif #include #include diff --git a/kernel/arch/arc/include/asm/irqflags-arcv2.h b/kernel/arch/arc/include/asm/irqflags-arcv2.h new file mode 100644 index 000000000..68b609234 --- /dev/null +++ b/kernel/arch/arc/include/asm/irqflags-arcv2.h @@ -0,0 +1,138 @@ +/* + * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __ASM_IRQFLAGS_ARCV2_H +#define __ASM_IRQFLAGS_ARCV2_H + +#include + +/* status32 Bits */ +#define STATUS_AD_BIT 19 /* Disable Align chk: core supports non-aligned */ +#define STATUS_IE_BIT 31 + +#define STATUS_AD_MASK (1< + +/* status32 Reg bits related to Interrupt Handling */ +#define STATUS_E1_BIT 1 /* Int 1 enable */ +#define STATUS_E2_BIT 2 /* Int 2 enable */ +#define STATUS_A1_BIT 3 /* Int 1 active */ +#define STATUS_A2_BIT 4 /* Int 2 active */ +#define STATUS_AE_BIT 5 /* Exception active */ + +#define STATUS_E1_MASK (1< - -/* status32 Reg bits related to Interrupt Handling */ -#define STATUS_E1_BIT 1 /* Int 1 enable */ -#define STATUS_E2_BIT 2 /* Int 2 enable */ -#define STATUS_A1_BIT 3 /* Int 1 active */ -#define STATUS_A2_BIT 4 /* Int 2 active */ - -#define STATUS_E1_MASK (1< #else - -.macro TRACE_ASM_IRQ_DISABLE -.endm - -.macro TRACE_ASM_IRQ_ENABLE -.endm - +#include #endif -.macro IRQ_DISABLE scratch - lr \scratch, [status32] - bic \scratch, \scratch, (STATUS_E1_MASK | STATUS_E2_MASK) - flag \scratch - TRACE_ASM_IRQ_DISABLE -.endm - -.macro IRQ_ENABLE scratch - lr \scratch, [status32] - or \scratch, \scratch, (STATUS_E1_MASK | STATUS_E2_MASK) - flag \scratch - TRACE_ASM_IRQ_ENABLE -.endm - -#endif /* __ASSEMBLY__ */ - #endif diff --git a/kernel/arch/arc/include/asm/kmap_types.h b/kernel/arch/arc/include/asm/kmap_types.h new file mode 100644 index 000000000..f0d7f6ace --- /dev/null +++ b/kernel/arch/arc/include/asm/kmap_types.h @@ -0,0 +1,18 @@ +/* + * Copyright (C) 2015 Synopsys, Inc. (www.synopsys.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#ifndef _ASM_KMAP_TYPES_H +#define _ASM_KMAP_TYPES_H + +/* + * We primarily need to define KM_TYPE_NR here but that in turn + * is a function of PGDIR_SIZE etc. + * To avoid circular deps issue, put everything in asm/highmem.h + */ +#endif diff --git a/kernel/arch/arc/include/asm/mach_desc.h b/kernel/arch/arc/include/asm/mach_desc.h index e8993a2be..c28e6c347 100644 --- a/kernel/arch/arc/include/asm/mach_desc.h +++ b/kernel/arch/arc/include/asm/mach_desc.h @@ -23,11 +23,8 @@ * @dt_compat: Array of device tree 'compatible' strings * (XXX: although only 1st entry is looked at) * @init_early: Very early callback [called from setup_arch()] - * @init_irq: setup external IRQ controllers [called from init_IRQ()] - * @init_smp: for each CPU (e.g. setup IPI) + * @init_per_cpu: for each CPU as it is coming up (SMP as well as UP) * [(M):init_IRQ(), (o):start_kernel_secondary()] - * @init_time: platform specific clocksource/clockevent registration - * [called from time_init()] * @init_machine: arch initcall level callback (e.g. populate static * platform devices or parse Devicetree) * @init_late: Late initcall level callback @@ -36,13 +33,10 @@ struct machine_desc { const char *name; const char **dt_compat; - void (*init_early)(void); - void (*init_irq)(void); #ifdef CONFIG_SMP - void (*init_smp)(unsigned int); + void (*init_per_cpu)(unsigned int); #endif - void (*init_time)(void); void (*init_machine)(void); void (*init_late)(void); diff --git a/kernel/arch/arc/include/asm/mcip.h b/kernel/arch/arc/include/asm/mcip.h new file mode 100644 index 000000000..46f4e5351 --- /dev/null +++ b/kernel/arch/arc/include/asm/mcip.h @@ -0,0 +1,91 @@ +/* + * ARConnect IP Support (Multi core enabler: Cross core IPI, RTC ...) + * + * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __ASM_MCIP_H +#define __ASM_MCIP_H + +#ifdef CONFIG_ISA_ARCV2 + +#include + +#define ARC_REG_MCIP_BCR 0x0d0 +#define ARC_REG_MCIP_CMD 0x600 +#define ARC_REG_MCIP_WDATA 0x601 +#define ARC_REG_MCIP_READBACK 0x602 + +struct mcip_cmd { +#ifdef CONFIG_CPU_BIG_ENDIAN + unsigned int pad:8, param:16, cmd:8; +#else + unsigned int cmd:8, param:16, pad:8; +#endif + +#define CMD_INTRPT_GENERATE_IRQ 0x01 +#define CMD_INTRPT_GENERATE_ACK 0x02 +#define CMD_INTRPT_READ_STATUS 0x03 +#define CMD_INTRPT_CHECK_SOURCE 0x04 + +/* Semaphore Commands */ +#define CMD_SEMA_CLAIM_AND_READ 0x11 +#define CMD_SEMA_RELEASE 0x12 + +#define CMD_DEBUG_SET_MASK 0x34 +#define CMD_DEBUG_SET_SELECT 0x36 + +#define CMD_GRTC_READ_LO 0x42 +#define CMD_GRTC_READ_HI 0x43 + +#define CMD_IDU_ENABLE 0x71 +#define CMD_IDU_DISABLE 0x72 +#define CMD_IDU_SET_MODE 0x74 +#define CMD_IDU_SET_DEST 0x76 +#define CMD_IDU_SET_MASK 0x7C + +#define IDU_M_TRIG_LEVEL 0x0 +#define IDU_M_TRIG_EDGE 0x1 + +#define IDU_M_DISTRI_RR 0x0 +#define IDU_M_DISTRI_DEST 0x2 +}; + +/* + * MCIP programming model + * + * - Simple commands write {cmd:8,param:16} to MCIP_CMD aux reg + * (param could be irq, common_irq, core_id ...) + * - More involved commands setup MCIP_WDATA with cmd specific data + * before invoking the simple command + */ +static inline void __mcip_cmd(unsigned int cmd, unsigned int param) +{ + struct mcip_cmd buf; + + buf.pad = 0; + buf.cmd = cmd; + buf.param = param; + + WRITE_AUX(ARC_REG_MCIP_CMD, buf); +} + +/* + * Setup additional data for a cmd + * Callers need to lock to ensure atomicity + */ +static inline void __mcip_cmd_data(unsigned int cmd, unsigned int param, + unsigned int data) +{ + write_aux_reg(ARC_REG_MCIP_WDATA, data); + + __mcip_cmd(cmd, param); +} + +#endif + +#endif diff --git a/kernel/arch/arc/include/asm/mmu.h b/kernel/arch/arc/include/asm/mmu.h index 8c84ae98c..b144d7ca7 100644 --- a/kernel/arch/arc/include/asm/mmu.h +++ b/kernel/arch/arc/include/asm/mmu.h @@ -15,24 +15,43 @@ #define CONFIG_ARC_MMU_VER 2 #elif defined(CONFIG_ARC_MMU_V3) #define CONFIG_ARC_MMU_VER 3 +#elif defined(CONFIG_ARC_MMU_V4) +#define CONFIG_ARC_MMU_VER 4 #endif /* MMU Management regs */ #define ARC_REG_MMU_BCR 0x06f +#if (CONFIG_ARC_MMU_VER < 4) #define ARC_REG_TLBPD0 0x405 #define ARC_REG_TLBPD1 0x406 +#define ARC_REG_TLBPD1HI 0 /* Dummy: allows code sharing with ARC700 */ #define ARC_REG_TLBINDEX 0x407 #define ARC_REG_TLBCOMMAND 0x408 #define ARC_REG_PID 0x409 #define ARC_REG_SCRATCH_DATA0 0x418 +#else +#define ARC_REG_TLBPD0 0x460 +#define ARC_REG_TLBPD1 0x461 +#define ARC_REG_TLBPD1HI 0x463 +#define ARC_REG_TLBINDEX 0x464 +#define ARC_REG_TLBCOMMAND 0x465 +#define ARC_REG_PID 0x468 +#define ARC_REG_SCRATCH_DATA0 0x46c +#endif /* Bits in MMU PID register */ -#define MMU_ENABLE (1 << 31) /* Enable MMU for process */ +#define __TLB_ENABLE (1 << 31) +#define __PROG_ENABLE (1 << 30) +#define MMU_ENABLE (__TLB_ENABLE | __PROG_ENABLE) /* Error code if probe fails */ #define TLB_LKUP_ERR 0x80000000 +#if (CONFIG_ARC_MMU_VER < 4) #define TLB_DUP_ERR (TLB_LKUP_ERR | 0x00000001) +#else +#define TLB_DUP_ERR (TLB_LKUP_ERR | 0x40000000) +#endif /* TLB Commands */ #define TLBWrite 0x1 @@ -45,6 +64,11 @@ #define TLBIVUTLB 0x6 /* explicitly inv uTLBs */ #endif +#if (CONFIG_ARC_MMU_VER >= 4) +#define TLBInsertEntry 0x7 +#define TLBDeleteEntry 0x8 +#endif + #ifndef __ASSEMBLY__ typedef struct { @@ -61,6 +85,11 @@ void arc_mmu_init(void); extern char *arc_mmu_mumbojumbo(int cpu_id, char *buf, int len); void read_decode_mmu_bcr(void); +static inline int is_pae40_enabled(void) +{ + return IS_ENABLED(CONFIG_ARC_HAS_PAE40); +} + #endif /* !__ASSEMBLY__ */ #endif diff --git a/kernel/arch/arc/include/asm/page.h b/kernel/arch/arc/include/asm/page.h index 9c8aa41e4..429957f1c 100644 --- a/kernel/arch/arc/include/asm/page.h +++ b/kernel/arch/arc/include/asm/page.h @@ -43,7 +43,6 @@ typedef struct { typedef struct { unsigned long pgprot; } pgprot_t; -typedef unsigned long pgtable_t; #define pte_val(x) ((x).pte) #define pgd_val(x) ((x).pgd) @@ -57,20 +56,26 @@ typedef unsigned long pgtable_t; #else /* !STRICT_MM_TYPECHECKS */ +#ifdef CONFIG_ARC_HAS_PAE40 +typedef unsigned long long pte_t; +#else typedef unsigned long pte_t; +#endif typedef unsigned long pgd_t; typedef unsigned long pgprot_t; -typedef unsigned long pgtable_t; #define pte_val(x) (x) #define pgd_val(x) (x) #define pgprot_val(x) (x) #define __pte(x) (x) +#define __pgd(x) (x) #define __pgprot(x) (x) #define pte_pgprot(x) (x) #endif +typedef pte_t * pgtable_t; + #define ARCH_PFN_OFFSET (CONFIG_LINUX_LINK_BASE >> PAGE_SHIFT) #define pfn_valid(pfn) (((pfn) - ARCH_PFN_OFFSET) < max_mapnr) diff --git a/kernel/arch/arc/include/asm/perf_event.h b/kernel/arch/arc/include/asm/perf_event.h index 2b8880e95..5f071762f 100644 --- a/kernel/arch/arc/include/asm/perf_event.h +++ b/kernel/arch/arc/include/asm/perf_event.h @@ -1,6 +1,7 @@ /* * Linux performance counter support for ARC * + * Copyright (C) 2014-2015 Synopsys, Inc. (www.synopsys.com) * Copyright (C) 2011-2013 Synopsys, Inc. (www.synopsys.com) * * This program is free software; you can redistribute it and/or modify @@ -12,8 +13,8 @@ #ifndef __ASM_PERF_EVENT_H #define __ASM_PERF_EVENT_H -/* real maximum varies per CPU, this is the maximum supported by the driver */ -#define ARC_PMU_MAX_HWEVENTS 64 +/* Max number of counters that PCT block may ever have */ +#define ARC_PERF_MAX_COUNTERS 32 #define ARC_REG_CC_BUILD 0xF6 #define ARC_REG_CC_INDEX 0x240 @@ -28,15 +29,22 @@ #define ARC_REG_PCT_CONFIG 0x254 #define ARC_REG_PCT_CONTROL 0x255 #define ARC_REG_PCT_INDEX 0x256 +#define ARC_REG_PCT_INT_CNTL 0x25C +#define ARC_REG_PCT_INT_CNTH 0x25D +#define ARC_REG_PCT_INT_CTRL 0x25E +#define ARC_REG_PCT_INT_ACT 0x25F + +#define ARC_REG_PCT_CONFIG_USER (1 << 18) /* count in user mode */ +#define ARC_REG_PCT_CONFIG_KERN (1 << 19) /* count in kernel mode */ #define ARC_REG_PCT_CONTROL_CC (1 << 16) /* clear counts */ #define ARC_REG_PCT_CONTROL_SN (1 << 17) /* snapshot */ struct arc_reg_pct_build { #ifdef CONFIG_CPU_BIG_ENDIAN - unsigned int m:8, c:8, r:6, s:2, v:8; + unsigned int m:8, c:8, r:5, i:1, s:2, v:8; #else - unsigned int v:8, s:2, r:6, c:8, m:8; + unsigned int v:8, s:2, i:1, r:5, c:8, m:8; #endif }; @@ -95,10 +103,13 @@ static const char * const arc_pmu_ev_hw_map[] = { /* counts condition */ [PERF_COUNT_HW_INSTRUCTIONS] = "iall", - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = "ijmp", + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = "ijmp", /* Excludes ZOL jumps */ [PERF_COUNT_ARC_BPOK] = "bpok", /* NP-NT, PT-T, PNT-NT */ +#ifdef CONFIG_ISA_ARCV2 + [PERF_COUNT_HW_BRANCH_MISSES] = "bpmp", +#else [PERF_COUNT_HW_BRANCH_MISSES] = "bpfail", /* NP-T, PT-NT, PNT-T */ - +#endif [PERF_COUNT_ARC_LDC] = "imemrdc", /* Instr: mem read cached */ [PERF_COUNT_ARC_STC] = "imemwrc", /* Instr: mem write cached */ diff --git a/kernel/arch/arc/include/asm/pgalloc.h b/kernel/arch/arc/include/asm/pgalloc.h index 81208bfd9..86ed67128 100644 --- a/kernel/arch/arc/include/asm/pgalloc.h +++ b/kernel/arch/arc/include/asm/pgalloc.h @@ -49,7 +49,7 @@ pmd_populate(struct mm_struct *mm, pmd_t *pmd, pgtable_t ptep) static inline int __get_order_pgd(void) { - return get_order(PTRS_PER_PGD * 4); + return get_order(PTRS_PER_PGD * sizeof(pgd_t)); } static inline pgd_t *pgd_alloc(struct mm_struct *mm) @@ -87,7 +87,7 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) static inline int __get_order_pte(void) { - return get_order(PTRS_PER_PTE * 4); + return get_order(PTRS_PER_PTE * sizeof(pte_t)); } static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, @@ -107,10 +107,10 @@ pte_alloc_one(struct mm_struct *mm, unsigned long address) pgtable_t pte_pg; struct page *page; - pte_pg = __get_free_pages(GFP_KERNEL | __GFP_REPEAT, __get_order_pte()); + pte_pg = (pgtable_t)__get_free_pages(GFP_KERNEL | __GFP_REPEAT, __get_order_pte()); if (!pte_pg) return 0; - memzero((void *)pte_pg, PTRS_PER_PTE * 4); + memzero((void *)pte_pg, PTRS_PER_PTE * sizeof(pte_t)); page = virt_to_page(pte_pg); if (!pgtable_page_ctor(page)) { __free_page(page); @@ -128,12 +128,12 @@ static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) static inline void pte_free(struct mm_struct *mm, pgtable_t ptep) { pgtable_page_dtor(virt_to_page(ptep)); - free_pages(ptep, __get_order_pte()); + free_pages((unsigned long)ptep, __get_order_pte()); } #define __pte_free_tlb(tlb, pte, addr) pte_free((tlb)->mm, pte) #define check_pgt_cache() do { } while (0) -#define pmd_pgtable(pmd) pmd_page_vaddr(pmd) +#define pmd_pgtable(pmd) ((pgtable_t) pmd_page_vaddr(pmd)) #endif /* _ASM_ARC_PGALLOC_H */ diff --git a/kernel/arch/arc/include/asm/pgtable.h b/kernel/arch/arc/include/asm/pgtable.h index 9615fe170..57af2f05a 100644 --- a/kernel/arch/arc/include/asm/pgtable.h +++ b/kernel/arch/arc/include/asm/pgtable.h @@ -38,6 +38,7 @@ #include #include #include +#include /************************************************************************** * Page Table Flags @@ -60,7 +61,8 @@ #define _PAGE_EXECUTE (1<<3) /* Page has user execute perm (H) */ #define _PAGE_WRITE (1<<4) /* Page has user write perm (H) */ #define _PAGE_READ (1<<5) /* Page has user read perm (H) */ -#define _PAGE_MODIFIED (1<<6) /* Page modified (dirty) (S) */ +#define _PAGE_DIRTY (1<<6) /* Page modified (dirty) (S) */ +#define _PAGE_SPECIAL (1<<7) #define _PAGE_GLOBAL (1<<8) /* Page is global (H) */ #define _PAGE_PRESENT (1<<10) /* TLB entry is valid (H) */ @@ -71,32 +73,44 @@ #define _PAGE_WRITE (1<<2) /* Page has user write perm (H) */ #define _PAGE_READ (1<<3) /* Page has user read perm (H) */ #define _PAGE_ACCESSED (1<<4) /* Page is accessed (S) */ -#define _PAGE_MODIFIED (1<<5) /* Page modified (dirty) (S) */ +#define _PAGE_DIRTY (1<<5) /* Page modified (dirty) (S) */ +#define _PAGE_SPECIAL (1<<6) + +#if (CONFIG_ARC_MMU_VER >= 4) +#define _PAGE_WTHRU (1<<7) /* Page cache mode write-thru (H) */ +#endif + #define _PAGE_GLOBAL (1<<8) /* Page is global (H) */ #define _PAGE_PRESENT (1<<9) /* TLB entry is valid (H) */ + +#if (CONFIG_ARC_MMU_VER >= 4) +#define _PAGE_HW_SZ (1<<10) /* Page Size indicator (H): 0 normal, 1 super */ +#endif + #define _PAGE_SHARED_CODE (1<<11) /* Shared Code page with cmn vaddr usable for shared TLB entries (H) */ + +#define _PAGE_UNUSED_BIT (1<<12) #endif /* vmalloc permissions */ #define _K_PAGE_PERMS (_PAGE_EXECUTE | _PAGE_WRITE | _PAGE_READ | \ _PAGE_GLOBAL | _PAGE_PRESENT) -#ifdef CONFIG_ARC_CACHE_PAGES -#define _PAGE_DEF_CACHEABLE _PAGE_CACHEABLE -#else -#define _PAGE_DEF_CACHEABLE (0) +#ifndef CONFIG_ARC_CACHE_PAGES +#undef _PAGE_CACHEABLE +#define _PAGE_CACHEABLE 0 #endif -/* Helper for every "user" page - * -kernel can R/W/X - * -by default cached, unless config otherwise - * -present in memory - */ -#define ___DEF (_PAGE_PRESENT | _PAGE_DEF_CACHEABLE) +#ifndef _PAGE_HW_SZ +#define _PAGE_HW_SZ 0 +#endif + +/* Defaults for every user page */ +#define ___DEF (_PAGE_PRESENT | _PAGE_CACHEABLE) /* Set of bits not changed in pte_modify */ -#define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_ACCESSED | _PAGE_MODIFIED) +#define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY) /* More Abbrevaited helpers */ #define PAGE_U_NONE __pgprot(___DEF) @@ -112,15 +126,20 @@ * user vaddr space - visible in all addr spaces, but kernel mode only * Thus Global, all-kernel-access, no-user-access, cached */ -#define PAGE_KERNEL __pgprot(_K_PAGE_PERMS | _PAGE_DEF_CACHEABLE) +#define PAGE_KERNEL __pgprot(_K_PAGE_PERMS | _PAGE_CACHEABLE) /* ioremap */ #define PAGE_KERNEL_NO_CACHE __pgprot(_K_PAGE_PERMS) /* Masks for actual TLB "PD"s */ -#define PTE_BITS_IN_PD0 (_PAGE_GLOBAL | _PAGE_PRESENT) +#define PTE_BITS_IN_PD0 (_PAGE_GLOBAL | _PAGE_PRESENT | _PAGE_HW_SZ) #define PTE_BITS_RWX (_PAGE_EXECUTE | _PAGE_WRITE | _PAGE_READ) + +#ifdef CONFIG_ARC_HAS_PAE40 +#define PTE_BITS_NON_RWX_IN_PD1 (0xff00000000 | PAGE_MASK | _PAGE_CACHEABLE) +#else #define PTE_BITS_NON_RWX_IN_PD1 (PAGE_MASK | _PAGE_CACHEABLE) +#endif /************************************************************************** * Mapping of vm_flags (Generic VM) to PTE flags (arch specific) @@ -181,26 +200,22 @@ /* Optimal Sizing of Pg Tbl - based on MMU page size */ #if defined(CONFIG_ARC_PAGE_SIZE_8K) -#define BITS_FOR_PTE 8 +#define BITS_FOR_PTE 8 /* 11:8:13 */ #elif defined(CONFIG_ARC_PAGE_SIZE_16K) -#define BITS_FOR_PTE 8 +#define BITS_FOR_PTE 8 /* 10:8:14 */ #elif defined(CONFIG_ARC_PAGE_SIZE_4K) -#define BITS_FOR_PTE 9 +#define BITS_FOR_PTE 9 /* 11:9:12 */ #endif #define BITS_FOR_PGD (32 - BITS_FOR_PTE - BITS_IN_PAGE) -#define PGDIR_SHIFT (BITS_FOR_PTE + BITS_IN_PAGE) +#define PGDIR_SHIFT (32 - BITS_FOR_PGD) #define PGDIR_SIZE (1UL << PGDIR_SHIFT) /* vaddr span, not PDG sz */ #define PGDIR_MASK (~(PGDIR_SIZE-1)) -#ifdef __ASSEMBLY__ -#define PTRS_PER_PTE (1 << BITS_FOR_PTE) -#define PTRS_PER_PGD (1 << BITS_FOR_PGD) -#else -#define PTRS_PER_PTE (1UL << BITS_FOR_PTE) -#define PTRS_PER_PGD (1UL << BITS_FOR_PGD) -#endif +#define PTRS_PER_PTE _BITUL(BITS_FOR_PTE) +#define PTRS_PER_PGD _BITUL(BITS_FOR_PGD) + /* * Number of entries a user land program use. * TASK_SIZE is the maximum vaddr that can be used by a userland program. @@ -260,15 +275,10 @@ static inline void pmd_set(pmd_t *pmdp, pte_t *ptep) (unsigned long)(((pte_val(x) - CONFIG_LINUX_LINK_BASE) >> \ PAGE_SHIFT))) -#define mk_pte(page, pgprot) \ -({ \ - pte_t pte; \ - pte_val(pte) = __pa(page_address(page)) + pgprot_val(pgprot); \ - pte; \ -}) - +#define mk_pte(page, prot) pfn_pte(page_to_pfn(page), prot) #define pte_pfn(pte) (pte_val(pte) >> PAGE_SHIFT) -#define pfn_pte(pfn, prot) (__pte(((pfn) << PAGE_SHIFT) | pgprot_val(prot))) +#define pfn_pte(pfn, prot) (__pte(((pte_t)(pfn) << PAGE_SHIFT) | \ + pgprot_val(prot))) #define __pte_index(addr) (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) /* @@ -285,23 +295,26 @@ static inline void pmd_set(pmd_t *pmdp, pte_t *ptep) /* Zoo of pte_xxx function */ #define pte_read(pte) (pte_val(pte) & _PAGE_READ) #define pte_write(pte) (pte_val(pte) & _PAGE_WRITE) -#define pte_dirty(pte) (pte_val(pte) & _PAGE_MODIFIED) +#define pte_dirty(pte) (pte_val(pte) & _PAGE_DIRTY) #define pte_young(pte) (pte_val(pte) & _PAGE_ACCESSED) -#define pte_special(pte) (0) +#define pte_special(pte) (pte_val(pte) & _PAGE_SPECIAL) #define PTE_BIT_FUNC(fn, op) \ static inline pte_t pte_##fn(pte_t pte) { pte_val(pte) op; return pte; } +PTE_BIT_FUNC(mknotpresent, &= ~(_PAGE_PRESENT)); PTE_BIT_FUNC(wrprotect, &= ~(_PAGE_WRITE)); PTE_BIT_FUNC(mkwrite, |= (_PAGE_WRITE)); -PTE_BIT_FUNC(mkclean, &= ~(_PAGE_MODIFIED)); -PTE_BIT_FUNC(mkdirty, |= (_PAGE_MODIFIED)); +PTE_BIT_FUNC(mkclean, &= ~(_PAGE_DIRTY)); +PTE_BIT_FUNC(mkdirty, |= (_PAGE_DIRTY)); PTE_BIT_FUNC(mkold, &= ~(_PAGE_ACCESSED)); PTE_BIT_FUNC(mkyoung, |= (_PAGE_ACCESSED)); PTE_BIT_FUNC(exprotect, &= ~(_PAGE_EXECUTE)); PTE_BIT_FUNC(mkexec, |= (_PAGE_EXECUTE)); +PTE_BIT_FUNC(mkspecial, |= (_PAGE_SPECIAL)); +PTE_BIT_FUNC(mkhuge, |= (_PAGE_HW_SZ)); -static inline pte_t pte_mkspecial(pte_t pte) { return pte; } +#define __HAVE_ARCH_PTE_SPECIAL static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) { @@ -347,7 +360,6 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, #define pgd_offset_fast(mm, addr) pgd_offset(mm, addr) #endif -extern void paging_init(void); extern pgd_t swapper_pg_dir[] __aligned(PAGE_SIZE); void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep); @@ -373,6 +385,10 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, * remap a physical page `pfn' of size `size' with page protection `prot' * into virtual address `from' */ +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +#include +#endif + #include /* to cope with aliasing VIPT cache */ diff --git a/kernel/arch/arc/include/asm/processor.h b/kernel/arch/arc/include/asm/processor.h index 52312cb5d..1d694c1ef 100644 --- a/kernel/arch/arc/include/asm/processor.h +++ b/kernel/arch/arc/include/asm/processor.h @@ -57,11 +57,7 @@ struct task_struct; * A lot of busy-wait loops in SMP are based off of non-volatile data otherwise * get optimised away by gcc */ -#ifdef CONFIG_SMP #define cpu_relax() __asm__ __volatile__ ("" : : : "memory") -#else -#define cpu_relax() do { } while (0) -#endif #define cpu_relax_lowlatency() cpu_relax() @@ -77,7 +73,7 @@ struct task_struct; */ #define TSK_K_ESP(tsk) (tsk->thread.ksp) -#define TSK_K_REG(tsk, off) (*((unsigned int *)(TSK_K_ESP(tsk) + \ +#define TSK_K_REG(tsk, off) (*((unsigned long *)(TSK_K_ESP(tsk) + \ sizeof(struct callee_regs) + off))) #define TSK_K_BLINK(tsk) TSK_K_REG(tsk, 4) @@ -100,29 +96,31 @@ extern unsigned int get_wchan(struct task_struct *p); #endif /* !__ASSEMBLY__ */ -/* Kernels Virtual memory area. - * Unlike other architectures(MIPS, sh, cris ) ARC 700 does not have a - * "kernel translated" region (like KSEG2 in MIPS). So we use a upper part - * of the translated bottom 2GB for kernel virtual memory and protect - * these pages from user accesses by disabling Ru, Eu and Wu. +/* + * System Memory Map on ARC + * + * ---------------------------- (lower 2G, Translated) ------------------------- + * 0x0000_0000 0x5FFF_FFFF (user vaddr: TASK_SIZE) + * 0x6000_0000 0x6FFF_FFFF (reserved gutter between U/K) + * 0x7000_0000 0x7FFF_FFFF (kvaddr: vmalloc/modules/pkmap..) + * + * PAGE_OFFSET ---------------- (Upper 2G, Untranslated) ----------------------- + * 0x8000_0000 0xBFFF_FFFF (kernel direct mapped) + * 0xC000_0000 0xFFFF_FFFF (peripheral uncached space) + * ----------------------------------------------------------------------------- */ -#define VMALLOC_SIZE (0x10000000) /* 256M */ -#define VMALLOC_START (PAGE_OFFSET - VMALLOC_SIZE) -#define VMALLOC_END (PAGE_OFFSET) +#define VMALLOC_START 0x70000000 -/* Most of the architectures seem to be keeping some kind of padding between - * userspace TASK_SIZE and PAGE_OFFSET. i.e TASK_SIZE != PAGE_OFFSET. +/* + * 1 PGDIR_SIZE each for fixmap/pkmap, 2 PGDIR_SIZE gutter + * See asm/highmem.h for details */ +#define VMALLOC_SIZE (PAGE_OFFSET - VMALLOC_START - PGDIR_SIZE * 4) +#define VMALLOC_END (VMALLOC_START + VMALLOC_SIZE) + #define USER_KERNEL_GUTTER 0x10000000 -/* User address space: - * On ARC700, CPU allows the entire lower half of 32 bit address space to be - * translated. Thus potentially 2G (0:0x7FFF_FFFF) could be User vaddr space. - * However we steal 256M for kernel addr (0x7000_0000:0x7FFF_FFFF) and another - * 256M (0x6000_0000:0x6FFF_FFFF) is gutter between user/kernel spaces - * Thus total User vaddr space is (0:0x5FFF_FFFF) - */ -#define TASK_SIZE (PAGE_OFFSET - VMALLOC_SIZE - USER_KERNEL_GUTTER) +#define TASK_SIZE (VMALLOC_START - USER_KERNEL_GUTTER) #define STACK_TOP TASK_SIZE #define STACK_TOP_MAX STACK_TOP diff --git a/kernel/arch/arc/include/asm/ptrace.h b/kernel/arch/arc/include/asm/ptrace.h index 2a58af7a2..69095da1f 100644 --- a/kernel/arch/arc/include/asm/ptrace.h +++ b/kernel/arch/arc/include/asm/ptrace.h @@ -16,23 +16,24 @@ /* THE pt_regs: Defines how regs are saved during entry into kernel */ +#ifdef CONFIG_ISA_ARCOMPACT struct pt_regs { /* Real registers */ - long bta; /* bta_l1, bta_l2, erbta */ + unsigned long bta; /* bta_l1, bta_l2, erbta */ - long lp_start, lp_end, lp_count; + unsigned long lp_start, lp_end, lp_count; - long status32; /* status32_l1, status32_l2, erstatus */ - long ret; /* ilink1, ilink2 or eret */ - long blink; - long fp; - long r26; /* gp */ + unsigned long status32; /* status32_l1, status32_l2, erstatus */ + unsigned long ret; /* ilink1, ilink2 or eret */ + unsigned long blink; + unsigned long fp; + unsigned long r26; /* gp */ - long r12, r11, r10, r9, r8, r7, r6, r5, r4, r3, r2, r1, r0; + unsigned long r12, r11, r10, r9, r8, r7, r6, r5, r4, r3, r2, r1, r0; - long sp; /* user/kernel sp depending on where we came from */ - long orig_r0; + unsigned long sp; /* User/Kernel depending on where we came from */ + unsigned long orig_r0; /* * To distinguish bet excp, syscall, irq @@ -54,16 +55,58 @@ struct pt_regs { unsigned long event; }; - long user_r25; + unsigned long user_r25; }; +#else + +struct pt_regs { + + unsigned long orig_r0; + + union { + struct { +#ifdef CONFIG_CPU_BIG_ENDIAN + unsigned long state:8, ecr_vec:8, + ecr_cause:8, ecr_param:8; +#else + unsigned long ecr_param:8, ecr_cause:8, + ecr_vec:8, state:8; +#endif + }; + unsigned long event; + }; + + unsigned long bta; /* bta_l1, bta_l2, erbta */ + + unsigned long user_r25; + + unsigned long r26; /* gp */ + unsigned long fp; + unsigned long sp; /* user/kernel sp depending on where we came from */ + + unsigned long r12; + + /*------- Below list auto saved by h/w -----------*/ + unsigned long r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11; + + unsigned long blink; + unsigned long lp_end, lp_start, lp_count; + + unsigned long ei, ldi, jli; + + unsigned long ret; + unsigned long status32; +}; + +#endif /* Callee saved registers - need to be saved only when you are scheduled out */ struct callee_regs { - long r25, r24, r23, r22, r21, r20, r19, r18, r17, r16, r15, r14, r13; + unsigned long r25, r24, r23, r22, r21, r20, r19, r18, r17, r16, r15, r14, r13; }; -#define instruction_pointer(regs) (unsigned long)((regs)->ret) +#define instruction_pointer(regs) ((regs)->ret) #define profile_pc(regs) instruction_pointer(regs) /* return 1 if user mode or 0 if kernel mode */ @@ -99,7 +142,7 @@ struct callee_regs { static inline long regs_return_value(struct pt_regs *regs) { - return regs->r0; + return (long)regs->r0; } #endif /* !__ASSEMBLY__ */ diff --git a/kernel/arch/arc/include/asm/setup.h b/kernel/arch/arc/include/asm/setup.h index 6e3ef5ba4..307846691 100644 --- a/kernel/arch/arc/include/asm/setup.h +++ b/kernel/arch/arc/include/asm/setup.h @@ -33,4 +33,11 @@ extern int root_mountflags, end_mem; void setup_processor(void); void __init setup_arch_memory(void); +/* Helpers used in arc_*_mumbojumbo routines */ +#define IS_AVAIL1(v, s) ((v) ? s : "") +#define IS_DISABLED_RUN(v) ((v) ? "" : "(disabled) ") +#define IS_USED_RUN(v) ((v) ? "" : "(not used) ") +#define IS_USED_CFG(cfg) IS_USED_RUN(IS_ENABLED(cfg)) +#define IS_AVAIL2(v, s, cfg) IS_AVAIL1(v, s), IS_AVAIL1(v, IS_USED_CFG(cfg)) + #endif /* __ASMARC_SETUP_H */ diff --git a/kernel/arch/arc/include/asm/smp.h b/kernel/arch/arc/include/asm/smp.h index 3845b9e94..991380438 100644 --- a/kernel/arch/arc/include/asm/smp.h +++ b/kernel/arch/arc/include/asm/smp.h @@ -45,12 +45,19 @@ extern int smp_ipi_irq_setup(int cpu, int irq); * struct plat_smp_ops - SMP callbacks provided by platform to ARC SMP * * @info: SoC SMP specific info for /proc/cpuinfo etc + * @init_early_smp: A SMP specific h/w block can init itself + * Could be common across platforms so not covered by + * mach_desc->init_early() + * @init_per_cpu: Called for each core so SMP h/w block driver can do + * any needed setup per cpu (e.g. IPI request) * @cpu_kick: For Master to kickstart a cpu (optionally at a PC) * @ipi_send: To send IPI to a @cpu * @ips_clear: To clear IPI received at @irq */ struct plat_smp_ops { const char *info; + void (*init_early_smp)(void); + void (*init_per_cpu)(int cpu); void (*cpu_kick)(int cpu, unsigned long pc); void (*ipi_send)(int cpu); void (*ipi_clear)(int irq); diff --git a/kernel/arch/arc/include/asm/spinlock.h b/kernel/arch/arc/include/asm/spinlock.h index e1651df6a..db8c59d1e 100644 --- a/kernel/arch/arc/include/asm/spinlock.h +++ b/kernel/arch/arc/include/asm/spinlock.h @@ -18,9 +18,518 @@ #define arch_spin_unlock_wait(x) \ do { while (arch_spin_is_locked(x)) cpu_relax(); } while (0) +#ifdef CONFIG_ARC_HAS_LLSC + +/* + * A normal LLOCK/SCOND based system, w/o need for livelock workaround + */ +#ifndef CONFIG_ARC_STAR_9000923308 + static inline void arch_spin_lock(arch_spinlock_t *lock) { - unsigned int tmp = __ARCH_SPIN_LOCK_LOCKED__; + unsigned int val; + + smp_mb(); + + __asm__ __volatile__( + "1: llock %[val], [%[slock]] \n" + " breq %[val], %[LOCKED], 1b \n" /* spin while LOCKED */ + " scond %[LOCKED], [%[slock]] \n" /* acquire */ + " bnz 1b \n" + " \n" + : [val] "=&r" (val) + : [slock] "r" (&(lock->slock)), + [LOCKED] "r" (__ARCH_SPIN_LOCK_LOCKED__) + : "memory", "cc"); + + smp_mb(); +} + +/* 1 - lock taken successfully */ +static inline int arch_spin_trylock(arch_spinlock_t *lock) +{ + unsigned int val, got_it = 0; + + smp_mb(); + + __asm__ __volatile__( + "1: llock %[val], [%[slock]] \n" + " breq %[val], %[LOCKED], 4f \n" /* already LOCKED, just bail */ + " scond %[LOCKED], [%[slock]] \n" /* acquire */ + " bnz 1b \n" + " mov %[got_it], 1 \n" + "4: \n" + " \n" + : [val] "=&r" (val), + [got_it] "+&r" (got_it) + : [slock] "r" (&(lock->slock)), + [LOCKED] "r" (__ARCH_SPIN_LOCK_LOCKED__) + : "memory", "cc"); + + smp_mb(); + + return got_it; +} + +static inline void arch_spin_unlock(arch_spinlock_t *lock) +{ + smp_mb(); + + lock->slock = __ARCH_SPIN_LOCK_UNLOCKED__; + + smp_mb(); +} + +/* + * Read-write spinlocks, allowing multiple readers but only one writer. + * Unfair locking as Writers could be starved indefinitely by Reader(s) + */ + +static inline void arch_read_lock(arch_rwlock_t *rw) +{ + unsigned int val; + + smp_mb(); + + /* + * zero means writer holds the lock exclusively, deny Reader. + * Otherwise grant lock to first/subseq reader + * + * if (rw->counter > 0) { + * rw->counter--; + * ret = 1; + * } + */ + + __asm__ __volatile__( + "1: llock %[val], [%[rwlock]] \n" + " brls %[val], %[WR_LOCKED], 1b\n" /* <= 0: spin while write locked */ + " sub %[val], %[val], 1 \n" /* reader lock */ + " scond %[val], [%[rwlock]] \n" + " bnz 1b \n" + " \n" + : [val] "=&r" (val) + : [rwlock] "r" (&(rw->counter)), + [WR_LOCKED] "ir" (0) + : "memory", "cc"); + + smp_mb(); +} + +/* 1 - lock taken successfully */ +static inline int arch_read_trylock(arch_rwlock_t *rw) +{ + unsigned int val, got_it = 0; + + smp_mb(); + + __asm__ __volatile__( + "1: llock %[val], [%[rwlock]] \n" + " brls %[val], %[WR_LOCKED], 4f\n" /* <= 0: already write locked, bail */ + " sub %[val], %[val], 1 \n" /* counter-- */ + " scond %[val], [%[rwlock]] \n" + " bnz 1b \n" /* retry if collided with someone */ + " mov %[got_it], 1 \n" + " \n" + "4: ; --- done --- \n" + + : [val] "=&r" (val), + [got_it] "+&r" (got_it) + : [rwlock] "r" (&(rw->counter)), + [WR_LOCKED] "ir" (0) + : "memory", "cc"); + + smp_mb(); + + return got_it; +} + +static inline void arch_write_lock(arch_rwlock_t *rw) +{ + unsigned int val; + + smp_mb(); + + /* + * If reader(s) hold lock (lock < __ARCH_RW_LOCK_UNLOCKED__), + * deny writer. Otherwise if unlocked grant to writer + * Hence the claim that Linux rwlocks are unfair to writers. + * (can be starved for an indefinite time by readers). + * + * if (rw->counter == __ARCH_RW_LOCK_UNLOCKED__) { + * rw->counter = 0; + * ret = 1; + * } + */ + + __asm__ __volatile__( + "1: llock %[val], [%[rwlock]] \n" + " brne %[val], %[UNLOCKED], 1b \n" /* while !UNLOCKED spin */ + " mov %[val], %[WR_LOCKED] \n" + " scond %[val], [%[rwlock]] \n" + " bnz 1b \n" + " \n" + : [val] "=&r" (val) + : [rwlock] "r" (&(rw->counter)), + [UNLOCKED] "ir" (__ARCH_RW_LOCK_UNLOCKED__), + [WR_LOCKED] "ir" (0) + : "memory", "cc"); + + smp_mb(); +} + +/* 1 - lock taken successfully */ +static inline int arch_write_trylock(arch_rwlock_t *rw) +{ + unsigned int val, got_it = 0; + + smp_mb(); + + __asm__ __volatile__( + "1: llock %[val], [%[rwlock]] \n" + " brne %[val], %[UNLOCKED], 4f \n" /* !UNLOCKED, bail */ + " mov %[val], %[WR_LOCKED] \n" + " scond %[val], [%[rwlock]] \n" + " bnz 1b \n" /* retry if collided with someone */ + " mov %[got_it], 1 \n" + " \n" + "4: ; --- done --- \n" + + : [val] "=&r" (val), + [got_it] "+&r" (got_it) + : [rwlock] "r" (&(rw->counter)), + [UNLOCKED] "ir" (__ARCH_RW_LOCK_UNLOCKED__), + [WR_LOCKED] "ir" (0) + : "memory", "cc"); + + smp_mb(); + + return got_it; +} + +static inline void arch_read_unlock(arch_rwlock_t *rw) +{ + unsigned int val; + + smp_mb(); + + /* + * rw->counter++; + */ + __asm__ __volatile__( + "1: llock %[val], [%[rwlock]] \n" + " add %[val], %[val], 1 \n" + " scond %[val], [%[rwlock]] \n" + " bnz 1b \n" + " \n" + : [val] "=&r" (val) + : [rwlock] "r" (&(rw->counter)) + : "memory", "cc"); + + smp_mb(); +} + +static inline void arch_write_unlock(arch_rwlock_t *rw) +{ + smp_mb(); + + rw->counter = __ARCH_RW_LOCK_UNLOCKED__; + + smp_mb(); +} + +#else /* CONFIG_ARC_STAR_9000923308 */ + +/* + * HS38x4 could get into a LLOCK/SCOND livelock in case of multiple overlapping + * coherency transactions in the SCU. The exclusive line state keeps rotating + * among contenting cores leading to a never ending cycle. So break the cycle + * by deferring the retry of failed exclusive access (SCOND). The actual delay + * needed is function of number of contending cores as well as the unrelated + * coherency traffic from other cores. To keep the code simple, start off with + * small delay of 1 which would suffice most cases and in case of contention + * double the delay. Eventually the delay is sufficient such that the coherency + * pipeline is drained, thus a subsequent exclusive access would succeed. + */ + +#define SCOND_FAIL_RETRY_VAR_DEF \ + unsigned int delay, tmp; \ + +#define SCOND_FAIL_RETRY_ASM \ + " ; --- scond fail delay --- \n" \ + " mov %[tmp], %[delay] \n" /* tmp = delay */ \ + "2: brne.d %[tmp], 0, 2b \n" /* while (tmp != 0) */ \ + " sub %[tmp], %[tmp], 1 \n" /* tmp-- */ \ + " rol %[delay], %[delay] \n" /* delay *= 2 */ \ + " b 1b \n" /* start over */ \ + " \n" \ + "4: ; --- done --- \n" \ + +#define SCOND_FAIL_RETRY_VARS \ + ,[delay] "=&r" (delay), [tmp] "=&r" (tmp) \ + +static inline void arch_spin_lock(arch_spinlock_t *lock) +{ + unsigned int val; + SCOND_FAIL_RETRY_VAR_DEF; + + smp_mb(); + + __asm__ __volatile__( + "0: mov %[delay], 1 \n" + "1: llock %[val], [%[slock]] \n" + " breq %[val], %[LOCKED], 0b \n" /* spin while LOCKED */ + " scond %[LOCKED], [%[slock]] \n" /* acquire */ + " bz 4f \n" /* done */ + " \n" + SCOND_FAIL_RETRY_ASM + + : [val] "=&r" (val) + SCOND_FAIL_RETRY_VARS + : [slock] "r" (&(lock->slock)), + [LOCKED] "r" (__ARCH_SPIN_LOCK_LOCKED__) + : "memory", "cc"); + + smp_mb(); +} + +/* 1 - lock taken successfully */ +static inline int arch_spin_trylock(arch_spinlock_t *lock) +{ + unsigned int val, got_it = 0; + SCOND_FAIL_RETRY_VAR_DEF; + + smp_mb(); + + __asm__ __volatile__( + "0: mov %[delay], 1 \n" + "1: llock %[val], [%[slock]] \n" + " breq %[val], %[LOCKED], 4f \n" /* already LOCKED, just bail */ + " scond %[LOCKED], [%[slock]] \n" /* acquire */ + " bz.d 4f \n" + " mov.z %[got_it], 1 \n" /* got it */ + " \n" + SCOND_FAIL_RETRY_ASM + + : [val] "=&r" (val), + [got_it] "+&r" (got_it) + SCOND_FAIL_RETRY_VARS + : [slock] "r" (&(lock->slock)), + [LOCKED] "r" (__ARCH_SPIN_LOCK_LOCKED__) + : "memory", "cc"); + + smp_mb(); + + return got_it; +} + +static inline void arch_spin_unlock(arch_spinlock_t *lock) +{ + smp_mb(); + + lock->slock = __ARCH_SPIN_LOCK_UNLOCKED__; + + smp_mb(); +} + +/* + * Read-write spinlocks, allowing multiple readers but only one writer. + * Unfair locking as Writers could be starved indefinitely by Reader(s) + */ + +static inline void arch_read_lock(arch_rwlock_t *rw) +{ + unsigned int val; + SCOND_FAIL_RETRY_VAR_DEF; + + smp_mb(); + + /* + * zero means writer holds the lock exclusively, deny Reader. + * Otherwise grant lock to first/subseq reader + * + * if (rw->counter > 0) { + * rw->counter--; + * ret = 1; + * } + */ + + __asm__ __volatile__( + "0: mov %[delay], 1 \n" + "1: llock %[val], [%[rwlock]] \n" + " brls %[val], %[WR_LOCKED], 0b\n" /* <= 0: spin while write locked */ + " sub %[val], %[val], 1 \n" /* reader lock */ + " scond %[val], [%[rwlock]] \n" + " bz 4f \n" /* done */ + " \n" + SCOND_FAIL_RETRY_ASM + + : [val] "=&r" (val) + SCOND_FAIL_RETRY_VARS + : [rwlock] "r" (&(rw->counter)), + [WR_LOCKED] "ir" (0) + : "memory", "cc"); + + smp_mb(); +} + +/* 1 - lock taken successfully */ +static inline int arch_read_trylock(arch_rwlock_t *rw) +{ + unsigned int val, got_it = 0; + SCOND_FAIL_RETRY_VAR_DEF; + + smp_mb(); + + __asm__ __volatile__( + "0: mov %[delay], 1 \n" + "1: llock %[val], [%[rwlock]] \n" + " brls %[val], %[WR_LOCKED], 4f\n" /* <= 0: already write locked, bail */ + " sub %[val], %[val], 1 \n" /* counter-- */ + " scond %[val], [%[rwlock]] \n" + " bz.d 4f \n" + " mov.z %[got_it], 1 \n" /* got it */ + " \n" + SCOND_FAIL_RETRY_ASM + + : [val] "=&r" (val), + [got_it] "+&r" (got_it) + SCOND_FAIL_RETRY_VARS + : [rwlock] "r" (&(rw->counter)), + [WR_LOCKED] "ir" (0) + : "memory", "cc"); + + smp_mb(); + + return got_it; +} + +static inline void arch_write_lock(arch_rwlock_t *rw) +{ + unsigned int val; + SCOND_FAIL_RETRY_VAR_DEF; + + smp_mb(); + + /* + * If reader(s) hold lock (lock < __ARCH_RW_LOCK_UNLOCKED__), + * deny writer. Otherwise if unlocked grant to writer + * Hence the claim that Linux rwlocks are unfair to writers. + * (can be starved for an indefinite time by readers). + * + * if (rw->counter == __ARCH_RW_LOCK_UNLOCKED__) { + * rw->counter = 0; + * ret = 1; + * } + */ + + __asm__ __volatile__( + "0: mov %[delay], 1 \n" + "1: llock %[val], [%[rwlock]] \n" + " brne %[val], %[UNLOCKED], 0b \n" /* while !UNLOCKED spin */ + " mov %[val], %[WR_LOCKED] \n" + " scond %[val], [%[rwlock]] \n" + " bz 4f \n" + " \n" + SCOND_FAIL_RETRY_ASM + + : [val] "=&r" (val) + SCOND_FAIL_RETRY_VARS + : [rwlock] "r" (&(rw->counter)), + [UNLOCKED] "ir" (__ARCH_RW_LOCK_UNLOCKED__), + [WR_LOCKED] "ir" (0) + : "memory", "cc"); + + smp_mb(); +} + +/* 1 - lock taken successfully */ +static inline int arch_write_trylock(arch_rwlock_t *rw) +{ + unsigned int val, got_it = 0; + SCOND_FAIL_RETRY_VAR_DEF; + + smp_mb(); + + __asm__ __volatile__( + "0: mov %[delay], 1 \n" + "1: llock %[val], [%[rwlock]] \n" + " brne %[val], %[UNLOCKED], 4f \n" /* !UNLOCKED, bail */ + " mov %[val], %[WR_LOCKED] \n" + " scond %[val], [%[rwlock]] \n" + " bz.d 4f \n" + " mov.z %[got_it], 1 \n" /* got it */ + " \n" + SCOND_FAIL_RETRY_ASM + + : [val] "=&r" (val), + [got_it] "+&r" (got_it) + SCOND_FAIL_RETRY_VARS + : [rwlock] "r" (&(rw->counter)), + [UNLOCKED] "ir" (__ARCH_RW_LOCK_UNLOCKED__), + [WR_LOCKED] "ir" (0) + : "memory", "cc"); + + smp_mb(); + + return got_it; +} + +static inline void arch_read_unlock(arch_rwlock_t *rw) +{ + unsigned int val; + + smp_mb(); + + /* + * rw->counter++; + */ + __asm__ __volatile__( + "1: llock %[val], [%[rwlock]] \n" + " add %[val], %[val], 1 \n" + " scond %[val], [%[rwlock]] \n" + " bnz 1b \n" + " \n" + : [val] "=&r" (val) + : [rwlock] "r" (&(rw->counter)) + : "memory", "cc"); + + smp_mb(); +} + +static inline void arch_write_unlock(arch_rwlock_t *rw) +{ + unsigned int val; + + smp_mb(); + + /* + * rw->counter = __ARCH_RW_LOCK_UNLOCKED__; + */ + __asm__ __volatile__( + "1: llock %[val], [%[rwlock]] \n" + " scond %[UNLOCKED], [%[rwlock]]\n" + " bnz 1b \n" + " \n" + : [val] "=&r" (val) + : [rwlock] "r" (&(rw->counter)), + [UNLOCKED] "r" (__ARCH_RW_LOCK_UNLOCKED__) + : "memory", "cc"); + + smp_mb(); +} + +#undef SCOND_FAIL_RETRY_VAR_DEF +#undef SCOND_FAIL_RETRY_ASM +#undef SCOND_FAIL_RETRY_VARS + +#endif /* CONFIG_ARC_STAR_9000923308 */ + +#else /* !CONFIG_ARC_HAS_LLSC */ + +static inline void arch_spin_lock(arch_spinlock_t *lock) +{ + unsigned int val = __ARCH_SPIN_LOCK_LOCKED__; /* * This smp_mb() is technically superfluous, we only need the one @@ -33,7 +542,7 @@ static inline void arch_spin_lock(arch_spinlock_t *lock) __asm__ __volatile__( "1: ex %0, [%1] \n" " breq %0, %2, 1b \n" - : "+&r" (tmp) + : "+&r" (val) : "r"(&(lock->slock)), "ir"(__ARCH_SPIN_LOCK_LOCKED__) : "memory"); @@ -48,26 +557,27 @@ static inline void arch_spin_lock(arch_spinlock_t *lock) smp_mb(); } +/* 1 - lock taken successfully */ static inline int arch_spin_trylock(arch_spinlock_t *lock) { - unsigned int tmp = __ARCH_SPIN_LOCK_LOCKED__; + unsigned int val = __ARCH_SPIN_LOCK_LOCKED__; smp_mb(); __asm__ __volatile__( "1: ex %0, [%1] \n" - : "+r" (tmp) + : "+r" (val) : "r"(&(lock->slock)) : "memory"); smp_mb(); - return (tmp == __ARCH_SPIN_LOCK_UNLOCKED__); + return (val == __ARCH_SPIN_LOCK_UNLOCKED__); } static inline void arch_spin_unlock(arch_spinlock_t *lock) { - unsigned int tmp = __ARCH_SPIN_LOCK_UNLOCKED__; + unsigned int val = __ARCH_SPIN_LOCK_UNLOCKED__; /* * RELEASE barrier: given the instructions avail on ARCv2, full barrier @@ -77,7 +587,7 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock) __asm__ __volatile__( " ex %0, [%1] \n" - : "+r" (tmp) + : "+r" (val) : "r"(&(lock->slock)) : "memory"); @@ -90,19 +600,12 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock) /* * Read-write spinlocks, allowing multiple readers but only one writer. + * Unfair locking as Writers could be starved indefinitely by Reader(s) * * The spinlock itself is contained in @counter and access to it is * serialized with @lock_mutex. - * - * Unfair locking as Writers could be starved indefinitely by Reader(s) */ -/* Would read_trylock() succeed? */ -#define arch_read_can_lock(x) ((x)->counter > 0) - -/* Would write_trylock() succeed? */ -#define arch_write_can_lock(x) ((x)->counter == __ARCH_RW_LOCK_UNLOCKED__) - /* 1 - lock taken successfully */ static inline int arch_read_trylock(arch_rwlock_t *rw) { @@ -173,6 +676,11 @@ static inline void arch_write_unlock(arch_rwlock_t *rw) arch_spin_unlock(&(rw->lock_mutex)); } +#endif + +#define arch_read_can_lock(x) ((x)->counter > 0) +#define arch_write_can_lock(x) ((x)->counter == __ARCH_RW_LOCK_UNLOCKED__) + #define arch_read_lock_flags(lock, flags) arch_read_lock(lock) #define arch_write_lock_flags(lock, flags) arch_write_lock(lock) diff --git a/kernel/arch/arc/include/asm/spinlock_types.h b/kernel/arch/arc/include/asm/spinlock_types.h index 662627ced..4e1ef5f65 100644 --- a/kernel/arch/arc/include/asm/spinlock_types.h +++ b/kernel/arch/arc/include/asm/spinlock_types.h @@ -26,7 +26,9 @@ typedef struct { */ typedef struct { volatile unsigned int counter; +#ifndef CONFIG_ARC_HAS_LLSC arch_spinlock_t lock_mutex; +#endif } arch_rwlock_t; #define __ARCH_RW_LOCK_UNLOCKED__ 0x01000000 diff --git a/kernel/arch/arc/include/asm/thread_info.h b/kernel/arch/arc/include/asm/thread_info.h index aca0d5a45..3af674556 100644 --- a/kernel/arch/arc/include/asm/thread_info.h +++ b/kernel/arch/arc/include/asm/thread_info.h @@ -25,6 +25,7 @@ #endif #define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER) +#define THREAD_SHIFT (PAGE_SHIFT << THREAD_SIZE_ORDER) #ifndef __ASSEMBLY__ diff --git a/kernel/arch/arc/include/asm/tlbflush.h b/kernel/arch/arc/include/asm/tlbflush.h index 71c7b2e4b..1fe9c8c80 100644 --- a/kernel/arch/arc/include/asm/tlbflush.h +++ b/kernel/arch/arc/include/asm/tlbflush.h @@ -17,6 +17,8 @@ void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long page); void local_flush_tlb_kernel_range(unsigned long start, unsigned long end); void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end); +void local_flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start, + unsigned long end); #ifndef CONFIG_SMP #define flush_tlb_range(vma, s, e) local_flush_tlb_range(vma, s, e) @@ -24,6 +26,7 @@ void local_flush_tlb_range(struct vm_area_struct *vma, #define flush_tlb_kernel_range(s, e) local_flush_tlb_kernel_range(s, e) #define flush_tlb_all() local_flush_tlb_all() #define flush_tlb_mm(mm) local_flush_tlb_mm(mm) +#define flush_pmd_tlb_range(vma, s, e) local_flush_pmd_tlb_range(vma, s, e) #else extern void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end); @@ -31,5 +34,7 @@ extern void flush_tlb_page(struct vm_area_struct *vma, unsigned long page); extern void flush_tlb_kernel_range(unsigned long start, unsigned long end); extern void flush_tlb_all(void); extern void flush_tlb_mm(struct mm_struct *mm); +extern void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end); + #endif /* CONFIG_SMP */ #endif diff --git a/kernel/arch/arc/include/asm/uaccess.h b/kernel/arch/arc/include/asm/uaccess.h index 30c9baffa..d1da6032b 100644 --- a/kernel/arch/arc/include/asm/uaccess.h +++ b/kernel/arch/arc/include/asm/uaccess.h @@ -659,31 +659,30 @@ static inline unsigned long __arc_clear_user(void __user *to, unsigned long n) static inline long __arc_strncpy_from_user(char *dst, const char __user *src, long count) { - long res = count; + long res = 0; char val; - unsigned int hw_count; if (count == 0) return 0; __asm__ __volatile__( - " lp 2f \n" + " lp 3f \n" "1: ldb.ab %3, [%2, 1] \n" - " breq.d %3, 0, 2f \n" + " breq.d %3, 0, 3f \n" " stb.ab %3, [%1, 1] \n" - "2: sub %0, %6, %4 \n" - "3: ;nop \n" + " add %0, %0, 1 # Num of NON NULL bytes copied \n" + "3: \n" " .section .fixup, \"ax\" \n" " .align 4 \n" - "4: mov %0, %5 \n" + "4: mov %0, %4 # sets @res as -EFAULT \n" " j 3b \n" " .previous \n" " .section __ex_table, \"a\" \n" " .align 4 \n" " .word 1b, 4b \n" " .previous \n" - : "=r"(res), "+r"(dst), "+r"(src), "=&r"(val), "=l"(hw_count) - : "g"(-EFAULT), "ir"(count), "4"(count) /* this "4" seeds lp_count */ + : "+r"(res), "+r"(dst), "+r"(src), "=r"(val) + : "g"(-EFAULT), "l"(count) : "memory"); return res; diff --git a/kernel/arch/arc/include/asm/unwind.h b/kernel/arch/arc/include/asm/unwind.h index 7ca628b6e..c11a25bb8 100644 --- a/kernel/arch/arc/include/asm/unwind.h +++ b/kernel/arch/arc/include/asm/unwind.h @@ -112,7 +112,6 @@ struct unwind_frame_info { extern int arc_unwind(struct unwind_frame_info *frame); extern void arc_unwind_init(void); -extern void arc_unwind_setup(void); extern void *unwind_add_table(struct module *module, const void *table_start, unsigned long table_size); extern void unwind_remove_table(void *handle, int init_only); @@ -152,9 +151,6 @@ static inline void arc_unwind_init(void) { } -static inline void arc_unwind_setup(void) -{ -} #define unwind_add_table(a, b, c) #define unwind_remove_table(a, b) -- cgit 1.2.3-korg