From 52f993b8e89487ec9ee15a7fb4979e0f09a45b27 Mon Sep 17 00:00:00 2001 From: Yunhong Jiang Date: Wed, 8 Mar 2017 23:13:28 -0800 Subject: Upgrade to 4.4.50-rt62 The current kernel is based on rt kernel v4.4.6-rt14. We will upgrade it to 4.4.50-rt62. The command to achieve it is: a) Clone a git repo from git://git.kernel.org/pub/scm/linux/kernel/git/rt/linux-stable-rt.git b) Get the diff between this two changesets: git diff 640eca2901f3435e616157b11379d3223a44b391 705619beeea1b0b48219a683fd1a901a86fdaf5e where the two commits are: [yjiang5@jnakajim-build linux-stable-rt]$ git show --oneline --name-only 640eca2901f3435e616157b11379d3223a44b391 640eca2901f3 v4.4.6-rt14 localversion-rt [yjiang5@jnakajim-build linux-stable-rt]$ git show --oneline --name-only 705619beeea1b0b48219a683fd1a901a86fdaf5e 705619beeea1 Linux 4.4.50-rt62 localversion-rt c) One patch has been backported thus revert the patch before applying. filterdiff -p1 -x scripts/package/Makefile ~/tmp/v4.4.6-rt14-4.4.50-rt62.diff |patch -p1 --dry-run Upstream status: backport Change-Id: I244d57a32f6066e5a5b9915f9fbf99e7bbca6e01 Signed-off-by: Yunhong Jiang --- kernel/arch/sparc/include/asm/head_64.h | 4 + kernel/arch/sparc/include/asm/mmu_64.h | 3 +- kernel/arch/sparc/include/asm/pgtable_64.h | 43 +++- kernel/arch/sparc/include/asm/tlbflush_64.h | 3 +- kernel/arch/sparc/include/asm/ttable.h | 8 +- kernel/arch/sparc/include/asm/uaccess_32.h | 4 +- kernel/arch/sparc/include/asm/uaccess_64.h | 66 +---- kernel/arch/sparc/kernel/Makefile | 1 + kernel/arch/sparc/kernel/cherrs.S | 14 +- kernel/arch/sparc/kernel/dtlb_prot.S | 4 +- kernel/arch/sparc/kernel/entry.S | 17 ++ kernel/arch/sparc/kernel/fpu_traps.S | 11 +- kernel/arch/sparc/kernel/head_64.S | 60 +---- kernel/arch/sparc/kernel/jump_label.c | 23 +- kernel/arch/sparc/kernel/ktlb.S | 12 + kernel/arch/sparc/kernel/misctrap.S | 12 +- kernel/arch/sparc/kernel/pci.c | 17 ++ kernel/arch/sparc/kernel/rtrap_64.S | 57 +---- kernel/arch/sparc/kernel/signal32.c | 46 ++-- kernel/arch/sparc/kernel/signal_32.c | 41 +-- kernel/arch/sparc/kernel/signal_64.c | 33 ++- kernel/arch/sparc/kernel/sigutil_32.c | 9 +- kernel/arch/sparc/kernel/sigutil_64.c | 10 +- kernel/arch/sparc/kernel/sparc_ksyms_64.c | 1 - kernel/arch/sparc/kernel/spiterrs.S | 18 +- kernel/arch/sparc/kernel/syscalls.S | 36 +++ kernel/arch/sparc/kernel/tsb.S | 12 +- kernel/arch/sparc/kernel/urtt_fill.S | 98 ++++++++ kernel/arch/sparc/kernel/utrap.S | 3 +- kernel/arch/sparc/kernel/vmlinux.lds.S | 4 + kernel/arch/sparc/kernel/winfixup.S | 3 +- kernel/arch/sparc/lib/GENcopy_from_user.S | 4 +- kernel/arch/sparc/lib/GENcopy_to_user.S | 4 +- kernel/arch/sparc/lib/GENmemcpy.S | 48 ++-- kernel/arch/sparc/lib/Makefile | 2 +- kernel/arch/sparc/lib/NG2copy_from_user.S | 8 +- kernel/arch/sparc/lib/NG2copy_to_user.S | 8 +- kernel/arch/sparc/lib/NG2memcpy.S | 228 +++++++++++------ kernel/arch/sparc/lib/NG4copy_from_user.S | 8 +- kernel/arch/sparc/lib/NG4copy_to_user.S | 8 +- kernel/arch/sparc/lib/NG4memcpy.S | 294 ++++++++++++++++------ kernel/arch/sparc/lib/NGcopy_from_user.S | 4 +- kernel/arch/sparc/lib/NGcopy_to_user.S | 4 +- kernel/arch/sparc/lib/NGmemcpy.S | 233 +++++++++++------ kernel/arch/sparc/lib/U1copy_from_user.S | 8 +- kernel/arch/sparc/lib/U1copy_to_user.S | 8 +- kernel/arch/sparc/lib/U1memcpy.S | 345 ++++++++++++++++--------- kernel/arch/sparc/lib/U3copy_from_user.S | 8 +- kernel/arch/sparc/lib/U3copy_to_user.S | 8 +- kernel/arch/sparc/lib/U3memcpy.S | 227 +++++++++++------ kernel/arch/sparc/lib/copy_in_user.S | 35 ++- kernel/arch/sparc/lib/user_fixup.c | 71 ------ kernel/arch/sparc/mm/fault_64.c | 6 +- kernel/arch/sparc/mm/hugetlbpage.c | 37 ++- kernel/arch/sparc/mm/init_64.c | 111 ++++++--- kernel/arch/sparc/mm/tlb.c | 29 ++- kernel/arch/sparc/mm/tsb.c | 63 +++-- kernel/arch/sparc/mm/ultra.S | 374 ++++++++++++++++++++++------ 58 files changed, 1882 insertions(+), 974 deletions(-) create mode 100644 kernel/arch/sparc/kernel/urtt_fill.S delete mode 100644 kernel/arch/sparc/lib/user_fixup.c (limited to 'kernel/arch/sparc') diff --git a/kernel/arch/sparc/include/asm/head_64.h b/kernel/arch/sparc/include/asm/head_64.h index 10e9dabc4..f0700cfee 100644 --- a/kernel/arch/sparc/include/asm/head_64.h +++ b/kernel/arch/sparc/include/asm/head_64.h @@ -15,6 +15,10 @@ #define PTREGS_OFF (STACK_BIAS + STACKFRAME_SZ) +#define RTRAP_PSTATE (PSTATE_TSO|PSTATE_PEF|PSTATE_PRIV|PSTATE_IE) +#define RTRAP_PSTATE_IRQOFF (PSTATE_TSO|PSTATE_PEF|PSTATE_PRIV) +#define RTRAP_PSTATE_AG_IRQOFF (PSTATE_TSO|PSTATE_PEF|PSTATE_PRIV|PSTATE_AG) + #define __CHEETAH_ID 0x003e0014 #define __JALAPENO_ID 0x003e0016 #define __SERRANO_ID 0x003e0022 diff --git a/kernel/arch/sparc/include/asm/mmu_64.h b/kernel/arch/sparc/include/asm/mmu_64.h index 70067ce18..f7de0dbc3 100644 --- a/kernel/arch/sparc/include/asm/mmu_64.h +++ b/kernel/arch/sparc/include/asm/mmu_64.h @@ -92,7 +92,8 @@ struct tsb_config { typedef struct { spinlock_t lock; unsigned long sparc64_ctx_val; - unsigned long huge_pte_count; + unsigned long hugetlb_pte_count; + unsigned long thp_pte_count; struct tsb_config tsb_block[MM_NUM_TSBS]; struct hv_tsb_descr tsb_descr[MM_NUM_TSBS]; } mm_context_t; diff --git a/kernel/arch/sparc/include/asm/pgtable_64.h b/kernel/arch/sparc/include/asm/pgtable_64.h index 131d36fcd..408b715c9 100644 --- a/kernel/arch/sparc/include/asm/pgtable_64.h +++ b/kernel/arch/sparc/include/asm/pgtable_64.h @@ -375,7 +375,7 @@ static inline pgprot_t pgprot_noncached(pgprot_t prot) #define pgprot_noncached pgprot_noncached #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) -static inline pte_t pte_mkhuge(pte_t pte) +static inline unsigned long __pte_huge_mask(void) { unsigned long mask; @@ -390,8 +390,19 @@ static inline pte_t pte_mkhuge(pte_t pte) : "=r" (mask) : "i" (_PAGE_SZHUGE_4U), "i" (_PAGE_SZHUGE_4V)); - return __pte(pte_val(pte) | mask); + return mask; +} + +static inline pte_t pte_mkhuge(pte_t pte) +{ + return __pte(pte_val(pte) | __pte_huge_mask()); +} + +static inline bool is_hugetlb_pte(pte_t pte) +{ + return !!(pte_val(pte) & __pte_huge_mask()); } + #ifdef CONFIG_TRANSPARENT_HUGEPAGE static inline pmd_t pmd_mkhuge(pmd_t pmd) { @@ -403,6 +414,11 @@ static inline pmd_t pmd_mkhuge(pmd_t pmd) return __pmd(pte_val(pte)); } #endif +#else +static inline bool is_hugetlb_pte(pte_t pte) +{ + return false; +} #endif static inline pte_t pte_mkdirty(pte_t pte) @@ -865,6 +881,19 @@ static inline unsigned long pud_pfn(pud_t pud) void tlb_batch_add(struct mm_struct *mm, unsigned long vaddr, pte_t *ptep, pte_t orig, int fullmm); +static void maybe_tlb_batch_add(struct mm_struct *mm, unsigned long vaddr, + pte_t *ptep, pte_t orig, int fullmm) +{ + /* It is more efficient to let flush_tlb_kernel_range() + * handle init_mm tlb flushes. + * + * SUN4V NOTE: _PAGE_VALID is the same value in both the SUN4U + * and SUN4V pte layout, so this inline test is fine. + */ + if (likely(mm != &init_mm) && pte_accessible(mm, orig)) + tlb_batch_add(mm, vaddr, ptep, orig, fullmm); +} + #define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, unsigned long addr, @@ -881,15 +910,7 @@ static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t orig = *ptep; *ptep = pte; - - /* It is more efficient to let flush_tlb_kernel_range() - * handle init_mm tlb flushes. - * - * SUN4V NOTE: _PAGE_VALID is the same value in both the SUN4U - * and SUN4V pte layout, so this inline test is fine. - */ - if (likely(mm != &init_mm) && pte_accessible(mm, orig)) - tlb_batch_add(mm, addr, ptep, orig, fullmm); + maybe_tlb_batch_add(mm, addr, ptep, orig, fullmm); } #define set_pte_at(mm,addr,ptep,pte) \ diff --git a/kernel/arch/sparc/include/asm/tlbflush_64.h b/kernel/arch/sparc/include/asm/tlbflush_64.h index dea1cfa21..a8e192e90 100644 --- a/kernel/arch/sparc/include/asm/tlbflush_64.h +++ b/kernel/arch/sparc/include/asm/tlbflush_64.h @@ -8,6 +8,7 @@ #define TLB_BATCH_NR 192 struct tlb_batch { + bool huge; struct mm_struct *mm; unsigned long tlb_nr; unsigned long active; @@ -16,7 +17,7 @@ struct tlb_batch { void flush_tsb_kernel_range(unsigned long start, unsigned long end); void flush_tsb_user(struct tlb_batch *tb); -void flush_tsb_user_page(struct mm_struct *mm, unsigned long vaddr); +void flush_tsb_user_page(struct mm_struct *mm, unsigned long vaddr, bool huge); /* TLB flush operations. */ diff --git a/kernel/arch/sparc/include/asm/ttable.h b/kernel/arch/sparc/include/asm/ttable.h index 71b5a6752..781b9f1db 100644 --- a/kernel/arch/sparc/include/asm/ttable.h +++ b/kernel/arch/sparc/include/asm/ttable.h @@ -589,8 +589,8 @@ user_rtt_fill_64bit: \ restored; \ nop; nop; nop; nop; nop; nop; \ nop; nop; nop; nop; nop; \ - ba,a,pt %xcc, user_rtt_fill_fixup; \ - ba,a,pt %xcc, user_rtt_fill_fixup; \ + ba,a,pt %xcc, user_rtt_fill_fixup_dax; \ + ba,a,pt %xcc, user_rtt_fill_fixup_mna; \ ba,a,pt %xcc, user_rtt_fill_fixup; @@ -652,8 +652,8 @@ user_rtt_fill_32bit: \ restored; \ nop; nop; nop; nop; nop; \ nop; nop; nop; \ - ba,a,pt %xcc, user_rtt_fill_fixup; \ - ba,a,pt %xcc, user_rtt_fill_fixup; \ + ba,a,pt %xcc, user_rtt_fill_fixup_dax; \ + ba,a,pt %xcc, user_rtt_fill_fixup_mna; \ ba,a,pt %xcc, user_rtt_fill_fixup; diff --git a/kernel/arch/sparc/include/asm/uaccess_32.h b/kernel/arch/sparc/include/asm/uaccess_32.h index 64ee103dc..dfb542c7c 100644 --- a/kernel/arch/sparc/include/asm/uaccess_32.h +++ b/kernel/arch/sparc/include/asm/uaccess_32.h @@ -328,8 +328,10 @@ static inline unsigned long copy_from_user(void *to, const void __user *from, un { if (n && __access_ok((unsigned long) from, n)) return __copy_user((__force void __user *) to, from, n); - else + else { + memset(to, 0, n); return n; + } } static inline unsigned long __copy_from_user(void *to, const void __user *from, unsigned long n) diff --git a/kernel/arch/sparc/include/asm/uaccess_64.h b/kernel/arch/sparc/include/asm/uaccess_64.h index ea6e9a20f..f42851248 100644 --- a/kernel/arch/sparc/include/asm/uaccess_64.h +++ b/kernel/arch/sparc/include/asm/uaccess_64.h @@ -98,7 +98,6 @@ struct exception_table_entry { unsigned int insn, fixup; }; -void __ret_efault(void); void __retl_efault(void); /* Uh, these should become the main single-value transfer routines.. @@ -179,20 +178,6 @@ int __put_user_bad(void); __gu_ret; \ }) -#define __get_user_nocheck_ret(data, addr, size, type, retval) ({ \ - register unsigned long __gu_val __asm__ ("l1"); \ - switch (size) { \ - case 1: __get_user_asm_ret(__gu_val, ub, addr, retval); break; \ - case 2: __get_user_asm_ret(__gu_val, uh, addr, retval); break; \ - case 4: __get_user_asm_ret(__gu_val, uw, addr, retval); break; \ - case 8: __get_user_asm_ret(__gu_val, x, addr, retval); break; \ - default: \ - if (__get_user_bad()) \ - return retval; \ - } \ - data = (__force type) __gu_val; \ -}) - #define __get_user_asm(x, size, addr, ret) \ __asm__ __volatile__( \ "/* Get user asm, inline. */\n" \ @@ -214,80 +199,35 @@ __asm__ __volatile__( \ : "=r" (ret), "=r" (x) : "r" (__m(addr)), \ "i" (-EFAULT)) -#define __get_user_asm_ret(x, size, addr, retval) \ -if (__builtin_constant_p(retval) && retval == -EFAULT) \ - __asm__ __volatile__( \ - "/* Get user asm ret, inline. */\n" \ - "1:\t" "ld"#size "a [%1] %%asi, %0\n\n\t" \ - ".section __ex_table,\"a\"\n\t" \ - ".align 4\n\t" \ - ".word 1b,__ret_efault\n\n\t" \ - ".previous\n\t" \ - : "=r" (x) : "r" (__m(addr))); \ -else \ - __asm__ __volatile__( \ - "/* Get user asm ret, inline. */\n" \ - "1:\t" "ld"#size "a [%1] %%asi, %0\n\n\t" \ - ".section .fixup,#alloc,#execinstr\n\t" \ - ".align 4\n" \ - "3:\n\t" \ - "ret\n\t" \ - " restore %%g0, %2, %%o0\n\n\t" \ - ".previous\n\t" \ - ".section __ex_table,\"a\"\n\t" \ - ".align 4\n\t" \ - ".word 1b, 3b\n\n\t" \ - ".previous\n\t" \ - : "=r" (x) : "r" (__m(addr)), "i" (retval)) - int __get_user_bad(void); unsigned long __must_check ___copy_from_user(void *to, const void __user *from, unsigned long size); -unsigned long copy_from_user_fixup(void *to, const void __user *from, - unsigned long size); static inline unsigned long __must_check copy_from_user(void *to, const void __user *from, unsigned long size) { - unsigned long ret = ___copy_from_user(to, from, size); - - if (unlikely(ret)) - ret = copy_from_user_fixup(to, from, size); - - return ret; + return ___copy_from_user(to, from, size); } #define __copy_from_user copy_from_user unsigned long __must_check ___copy_to_user(void __user *to, const void *from, unsigned long size); -unsigned long copy_to_user_fixup(void __user *to, const void *from, - unsigned long size); static inline unsigned long __must_check copy_to_user(void __user *to, const void *from, unsigned long size) { - unsigned long ret = ___copy_to_user(to, from, size); - - if (unlikely(ret)) - ret = copy_to_user_fixup(to, from, size); - return ret; + return ___copy_to_user(to, from, size); } #define __copy_to_user copy_to_user unsigned long __must_check ___copy_in_user(void __user *to, const void __user *from, unsigned long size); -unsigned long copy_in_user_fixup(void __user *to, void __user *from, - unsigned long size); static inline unsigned long __must_check copy_in_user(void __user *to, void __user *from, unsigned long size) { - unsigned long ret = ___copy_in_user(to, from, size); - - if (unlikely(ret)) - ret = copy_in_user_fixup(to, from, size); - return ret; + return ___copy_in_user(to, from, size); } #define __copy_in_user copy_in_user diff --git a/kernel/arch/sparc/kernel/Makefile b/kernel/arch/sparc/kernel/Makefile index 7cf9c6ea3..fdb13327f 100644 --- a/kernel/arch/sparc/kernel/Makefile +++ b/kernel/arch/sparc/kernel/Makefile @@ -21,6 +21,7 @@ CFLAGS_REMOVE_perf_event.o := -pg CFLAGS_REMOVE_pcr.o := -pg endif +obj-$(CONFIG_SPARC64) += urtt_fill.o obj-$(CONFIG_SPARC32) += entry.o wof.o wuf.o obj-$(CONFIG_SPARC32) += etrap_32.o obj-$(CONFIG_SPARC32) += rtrap_32.o diff --git a/kernel/arch/sparc/kernel/cherrs.S b/kernel/arch/sparc/kernel/cherrs.S index 4ee1ad420..655628def 100644 --- a/kernel/arch/sparc/kernel/cherrs.S +++ b/kernel/arch/sparc/kernel/cherrs.S @@ -214,8 +214,7 @@ do_dcpe_tl1_nonfatal: /* Ok we may use interrupt globals safely. */ subcc %g1, %g2, %g1 ! Next cacheline bge,pt %icc, 1b nop - ba,pt %xcc, dcpe_icpe_tl1_common - nop + ba,a,pt %xcc, dcpe_icpe_tl1_common do_dcpe_tl1_fatal: sethi %hi(1f), %g7 @@ -224,8 +223,7 @@ do_dcpe_tl1_fatal: mov 0x2, %o0 call cheetah_plus_parity_error add %sp, PTREGS_OFF, %o1 - ba,pt %xcc, rtrap - nop + ba,a,pt %xcc, rtrap .size do_dcpe_tl1,.-do_dcpe_tl1 .globl do_icpe_tl1 @@ -259,8 +257,7 @@ do_icpe_tl1_nonfatal: /* Ok we may use interrupt globals safely. */ subcc %g1, %g2, %g1 bge,pt %icc, 1b nop - ba,pt %xcc, dcpe_icpe_tl1_common - nop + ba,a,pt %xcc, dcpe_icpe_tl1_common do_icpe_tl1_fatal: sethi %hi(1f), %g7 @@ -269,8 +266,7 @@ do_icpe_tl1_fatal: mov 0x3, %o0 call cheetah_plus_parity_error add %sp, PTREGS_OFF, %o1 - ba,pt %xcc, rtrap - nop + ba,a,pt %xcc, rtrap .size do_icpe_tl1,.-do_icpe_tl1 .type dcpe_icpe_tl1_common,#function @@ -456,7 +452,7 @@ __cheetah_log_error: cmp %g2, 0x63 be c_cee nop - ba,pt %xcc, c_deferred + ba,a,pt %xcc, c_deferred .size __cheetah_log_error,.-__cheetah_log_error /* Cheetah FECC trap handling, we get here from tl{0,1}_fecc diff --git a/kernel/arch/sparc/kernel/dtlb_prot.S b/kernel/arch/sparc/kernel/dtlb_prot.S index d668ca149..4087a62f9 100644 --- a/kernel/arch/sparc/kernel/dtlb_prot.S +++ b/kernel/arch/sparc/kernel/dtlb_prot.S @@ -25,13 +25,13 @@ /* PROT ** ICACHE line 2: More real fault processing */ ldxa [%g4] ASI_DMMU, %g5 ! Put tagaccess in %g5 + srlx %g5, PAGE_SHIFT, %g5 + sllx %g5, PAGE_SHIFT, %g5 ! Clear context ID bits bgu,pn %xcc, winfix_trampoline ! Yes, perform winfixup mov FAULT_CODE_DTLB | FAULT_CODE_WRITE, %g4 ba,pt %xcc, sparc64_realfault_common ! Nope, normal fault nop nop - nop - nop /* PROT ** ICACHE line 3: Unused... */ nop diff --git a/kernel/arch/sparc/kernel/entry.S b/kernel/arch/sparc/kernel/entry.S index 33c02b15f..a83707c83 100644 --- a/kernel/arch/sparc/kernel/entry.S +++ b/kernel/arch/sparc/kernel/entry.S @@ -948,7 +948,24 @@ linux_syscall_trace: cmp %o0, 0 bne 3f mov -ENOSYS, %o0 + + /* Syscall tracing can modify the registers. */ + ld [%sp + STACKFRAME_SZ + PT_G1], %g1 + sethi %hi(sys_call_table), %l7 + ld [%sp + STACKFRAME_SZ + PT_I0], %i0 + or %l7, %lo(sys_call_table), %l7 + ld [%sp + STACKFRAME_SZ + PT_I1], %i1 + ld [%sp + STACKFRAME_SZ + PT_I2], %i2 + ld [%sp + STACKFRAME_SZ + PT_I3], %i3 + ld [%sp + STACKFRAME_SZ + PT_I4], %i4 + ld [%sp + STACKFRAME_SZ + PT_I5], %i5 + cmp %g1, NR_syscalls + bgeu 3f + mov -ENOSYS, %o0 + + sll %g1, 2, %l4 mov %i0, %o0 + ld [%l7 + %l4], %l7 mov %i1, %o1 mov %i2, %o2 mov %i3, %o3 diff --git a/kernel/arch/sparc/kernel/fpu_traps.S b/kernel/arch/sparc/kernel/fpu_traps.S index a6864826a..336d2750f 100644 --- a/kernel/arch/sparc/kernel/fpu_traps.S +++ b/kernel/arch/sparc/kernel/fpu_traps.S @@ -100,8 +100,8 @@ do_fpdis: fmuld %f0, %f2, %f26 faddd %f0, %f2, %f28 fmuld %f0, %f2, %f30 - b,pt %xcc, fpdis_exit - nop + ba,a,pt %xcc, fpdis_exit + 2: andcc %g5, FPRS_DU, %g0 bne,pt %icc, 3f fzero %f32 @@ -144,8 +144,8 @@ do_fpdis: fmuld %f32, %f34, %f58 faddd %f32, %f34, %f60 fmuld %f32, %f34, %f62 - ba,pt %xcc, fpdis_exit - nop + ba,a,pt %xcc, fpdis_exit + 3: mov SECONDARY_CONTEXT, %g3 add %g6, TI_FPREGS, %g1 @@ -197,8 +197,7 @@ fpdis_exit2: fp_other_bounce: call do_fpother add %sp, PTREGS_OFF, %o0 - ba,pt %xcc, rtrap - nop + ba,a,pt %xcc, rtrap .size fp_other_bounce,.-fp_other_bounce .align 32 diff --git a/kernel/arch/sparc/kernel/head_64.S b/kernel/arch/sparc/kernel/head_64.S index f2d30cab5..7eeeb1d5a 100644 --- a/kernel/arch/sparc/kernel/head_64.S +++ b/kernel/arch/sparc/kernel/head_64.S @@ -461,9 +461,8 @@ sun4v_chip_type: subcc %g3, 1, %g3 bne,pt %xcc, 41b add %g1, 1, %g1 - mov SUN4V_CHIP_SPARC64X, %g4 ba,pt %xcc, 5f - nop + mov SUN4V_CHIP_SPARC64X, %g4 49: mov SUN4V_CHIP_UNKNOWN, %g4 @@ -548,8 +547,7 @@ sun4u_init: stxa %g0, [%g7] ASI_DMMU membar #Sync - ba,pt %xcc, sun4u_continue - nop + ba,a,pt %xcc, sun4u_continue sun4v_init: /* Set ctx 0 */ @@ -560,14 +558,12 @@ sun4v_init: mov SECONDARY_CONTEXT, %g7 stxa %g0, [%g7] ASI_MMU membar #Sync - ba,pt %xcc, niagara_tlb_fixup - nop + ba,a,pt %xcc, niagara_tlb_fixup sun4u_continue: BRANCH_IF_ANY_CHEETAH(g1, g7, cheetah_tlb_fixup) - ba,pt %xcc, spitfire_tlb_fixup - nop + ba,a,pt %xcc, spitfire_tlb_fixup niagara_tlb_fixup: mov 3, %g2 /* Set TLB type to hypervisor. */ @@ -639,8 +635,7 @@ niagara_patch: call hypervisor_patch_cachetlbops nop - ba,pt %xcc, tlb_fixup_done - nop + ba,a,pt %xcc, tlb_fixup_done cheetah_tlb_fixup: mov 2, %g2 /* Set TLB type to cheetah+. */ @@ -659,8 +654,7 @@ cheetah_tlb_fixup: call cheetah_patch_cachetlbops nop - ba,pt %xcc, tlb_fixup_done - nop + ba,a,pt %xcc, tlb_fixup_done spitfire_tlb_fixup: /* Set TLB type to spitfire. */ @@ -782,8 +776,7 @@ setup_trap_table: call %o1 add %sp, (2047 + 128), %o0 - ba,pt %xcc, 2f - nop + ba,a,pt %xcc, 2f 1: sethi %hi(sparc64_ttable_tl0), %o0 set prom_set_trap_table_name, %g2 @@ -822,8 +815,7 @@ setup_trap_table: BRANCH_IF_ANY_CHEETAH(o2, o3, 1f) - ba,pt %xcc, 2f - nop + ba,a,pt %xcc, 2f /* Disable STICK_INT interrupts. */ 1: @@ -930,47 +922,11 @@ prom_tba: .xword 0 tlb_type: .word 0 /* Must NOT end up in BSS */ .section ".fixup",#alloc,#execinstr - .globl __ret_efault, __retl_efault, __ret_one, __retl_one -ENTRY(__ret_efault) - ret - restore %g0, -EFAULT, %o0 -ENDPROC(__ret_efault) - ENTRY(__retl_efault) retl mov -EFAULT, %o0 ENDPROC(__retl_efault) -ENTRY(__retl_one) - retl - mov 1, %o0 -ENDPROC(__retl_one) - -ENTRY(__retl_one_fp) - VISExitHalf - retl - mov 1, %o0 -ENDPROC(__retl_one_fp) - -ENTRY(__ret_one_asi) - wr %g0, ASI_AIUS, %asi - ret - restore %g0, 1, %o0 -ENDPROC(__ret_one_asi) - -ENTRY(__retl_one_asi) - wr %g0, ASI_AIUS, %asi - retl - mov 1, %o0 -ENDPROC(__retl_one_asi) - -ENTRY(__retl_one_asi_fp) - wr %g0, ASI_AIUS, %asi - VISExitHalf - retl - mov 1, %o0 -ENDPROC(__retl_one_asi_fp) - ENTRY(__retl_o1) retl mov %o1, %o0 diff --git a/kernel/arch/sparc/kernel/jump_label.c b/kernel/arch/sparc/kernel/jump_label.c index 59bbeff55..07933b9e9 100644 --- a/kernel/arch/sparc/kernel/jump_label.c +++ b/kernel/arch/sparc/kernel/jump_label.c @@ -13,19 +13,30 @@ void arch_jump_label_transform(struct jump_entry *entry, enum jump_label_type type) { - u32 val; u32 *insn = (u32 *) (unsigned long) entry->code; + u32 val; if (type == JUMP_LABEL_JMP) { s32 off = (s32)entry->target - (s32)entry->code; + bool use_v9_branch = false; + + BUG_ON(off & 3); #ifdef CONFIG_SPARC64 - /* ba,pt %xcc, . + (off << 2) */ - val = 0x10680000 | ((u32) off >> 2); -#else - /* ba . + (off << 2) */ - val = 0x10800000 | ((u32) off >> 2); + if (off <= 0xfffff && off >= -0x100000) + use_v9_branch = true; #endif + if (use_v9_branch) { + /* WDISP19 - target is . + immed << 2 */ + /* ba,pt %xcc, . + off */ + val = 0x10680000 | (((u32) off >> 2) & 0x7ffff); + } else { + /* WDISP22 - target is . + immed << 2 */ + BUG_ON(off > 0x7fffff); + BUG_ON(off < -0x800000); + /* ba . + off */ + val = 0x10800000 | (((u32) off >> 2) & 0x3fffff); + } } else { val = 0x01000000; } diff --git a/kernel/arch/sparc/kernel/ktlb.S b/kernel/arch/sparc/kernel/ktlb.S index ef0d8e9e1..f22bec0db 100644 --- a/kernel/arch/sparc/kernel/ktlb.S +++ b/kernel/arch/sparc/kernel/ktlb.S @@ -20,6 +20,10 @@ kvmap_itlb: mov TLB_TAG_ACCESS, %g4 ldxa [%g4] ASI_IMMU, %g4 + /* The kernel executes in context zero, therefore we do not + * need to clear the context ID bits out of %g4 here. + */ + /* sun4v_itlb_miss branches here with the missing virtual * address already loaded into %g4 */ @@ -128,6 +132,10 @@ kvmap_dtlb: mov TLB_TAG_ACCESS, %g4 ldxa [%g4] ASI_DMMU, %g4 + /* The kernel executes in context zero, therefore we do not + * need to clear the context ID bits out of %g4 here. + */ + /* sun4v_dtlb_miss branches here with the missing virtual * address already loaded into %g4 */ @@ -251,6 +259,10 @@ kvmap_dtlb_longpath: nop .previous + /* The kernel executes in context zero, therefore we do not + * need to clear the context ID bits out of %g5 here. + */ + be,pt %xcc, sparc64_realfault_common mov FAULT_CODE_DTLB, %g4 ba,pt %xcc, winfix_trampoline diff --git a/kernel/arch/sparc/kernel/misctrap.S b/kernel/arch/sparc/kernel/misctrap.S index 753b4f031..34b493390 100644 --- a/kernel/arch/sparc/kernel/misctrap.S +++ b/kernel/arch/sparc/kernel/misctrap.S @@ -18,8 +18,7 @@ __do_privact: 109: or %g7, %lo(109b), %g7 call do_privact add %sp, PTREGS_OFF, %o0 - ba,pt %xcc, rtrap - nop + ba,a,pt %xcc, rtrap .size __do_privact,.-__do_privact .type do_mna,#function @@ -46,8 +45,7 @@ do_mna: mov %l5, %o2 call mem_address_unaligned add %sp, PTREGS_OFF, %o0 - ba,pt %xcc, rtrap - nop + ba,a,pt %xcc, rtrap .size do_mna,.-do_mna .type do_lddfmna,#function @@ -65,8 +63,7 @@ do_lddfmna: mov %l5, %o2 call handle_lddfmna add %sp, PTREGS_OFF, %o0 - ba,pt %xcc, rtrap - nop + ba,a,pt %xcc, rtrap .size do_lddfmna,.-do_lddfmna .type do_stdfmna,#function @@ -84,8 +81,7 @@ do_stdfmna: mov %l5, %o2 call handle_stdfmna add %sp, PTREGS_OFF, %o0 - ba,pt %xcc, rtrap - nop + ba,a,pt %xcc, rtrap .size do_stdfmna,.-do_stdfmna .type breakpoint_trap,#function diff --git a/kernel/arch/sparc/kernel/pci.c b/kernel/arch/sparc/kernel/pci.c index badf0951d..9f9614df9 100644 --- a/kernel/arch/sparc/kernel/pci.c +++ b/kernel/arch/sparc/kernel/pci.c @@ -994,6 +994,23 @@ void pcibios_set_master(struct pci_dev *dev) /* No special bus mastering setup handling */ } +#ifdef CONFIG_PCI_IOV +int pcibios_add_device(struct pci_dev *dev) +{ + struct pci_dev *pdev; + + /* Add sriov arch specific initialization here. + * Copy dev_archdata from PF to VF + */ + if (dev->is_virtfn) { + pdev = dev->physfn; + memcpy(&dev->dev.archdata, &pdev->dev.archdata, + sizeof(struct dev_archdata)); + } + return 0; +} +#endif /* CONFIG_PCI_IOV */ + static int __init pcibios_init(void) { pci_dfl_cache_line_size = 64 >> 2; diff --git a/kernel/arch/sparc/kernel/rtrap_64.S b/kernel/arch/sparc/kernel/rtrap_64.S index d08bdaffd..216948ca4 100644 --- a/kernel/arch/sparc/kernel/rtrap_64.S +++ b/kernel/arch/sparc/kernel/rtrap_64.S @@ -14,10 +14,6 @@ #include #include -#define RTRAP_PSTATE (PSTATE_TSO|PSTATE_PEF|PSTATE_PRIV|PSTATE_IE) -#define RTRAP_PSTATE_IRQOFF (PSTATE_TSO|PSTATE_PEF|PSTATE_PRIV) -#define RTRAP_PSTATE_AG_IRQOFF (PSTATE_TSO|PSTATE_PEF|PSTATE_PRIV|PSTATE_AG) - #ifdef CONFIG_CONTEXT_TRACKING # define SCHEDULE_USER schedule_user #else @@ -242,52 +238,17 @@ rt_continue: ldx [%sp + PTREGS_OFF + PT_V9_G1], %g1 wrpr %g1, %cwp ba,a,pt %xcc, user_rtt_fill_64bit -user_rtt_fill_fixup: - rdpr %cwp, %g1 - add %g1, 1, %g1 - wrpr %g1, 0x0, %cwp - - rdpr %wstate, %g2 - sll %g2, 3, %g2 - wrpr %g2, 0x0, %wstate - - /* We know %canrestore and %otherwin are both zero. */ - - sethi %hi(sparc64_kern_pri_context), %g2 - ldx [%g2 + %lo(sparc64_kern_pri_context)], %g2 - mov PRIMARY_CONTEXT, %g1 - -661: stxa %g2, [%g1] ASI_DMMU - .section .sun4v_1insn_patch, "ax" - .word 661b - stxa %g2, [%g1] ASI_MMU - .previous - - sethi %hi(KERNBASE), %g1 - flush %g1 +user_rtt_fill_fixup_dax: + ba,pt %xcc, user_rtt_fill_fixup_common + mov 1, %g3 - or %g4, FAULT_CODE_WINFIXUP, %g4 - stb %g4, [%g6 + TI_FAULT_CODE] - stx %g5, [%g6 + TI_FAULT_ADDR] +user_rtt_fill_fixup_mna: + ba,pt %xcc, user_rtt_fill_fixup_common + mov 2, %g3 - mov %g6, %l1 - wrpr %g0, 0x0, %tl - -661: nop - .section .sun4v_1insn_patch, "ax" - .word 661b - SET_GL(0) - .previous - - wrpr %g0, RTRAP_PSTATE, %pstate - - mov %l1, %g6 - ldx [%g6 + TI_TASK], %g4 - LOAD_PER_CPU_BASE(%g5, %g6, %g1, %g2, %g3) - call do_sparc64_fault - add %sp, PTREGS_OFF, %o0 - ba,pt %xcc, rtrap - nop +user_rtt_fill_fixup: + ba,pt %xcc, user_rtt_fill_fixup_common + clr %g3 user_rtt_pre_restore: add %g1, 1, %g1 diff --git a/kernel/arch/sparc/kernel/signal32.c b/kernel/arch/sparc/kernel/signal32.c index 4eed773a7..77655f0f0 100644 --- a/kernel/arch/sparc/kernel/signal32.c +++ b/kernel/arch/sparc/kernel/signal32.c @@ -138,12 +138,24 @@ int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from) return 0; } +/* Checks if the fp is valid. We always build signal frames which are + * 16-byte aligned, therefore we can always enforce that the restore + * frame has that property as well. + */ +static bool invalid_frame_pointer(void __user *fp, int fplen) +{ + if ((((unsigned long) fp) & 15) || + ((unsigned long)fp) > 0x100000000ULL - fplen) + return true; + return false; +} + void do_sigreturn32(struct pt_regs *regs) { struct signal_frame32 __user *sf; compat_uptr_t fpu_save; compat_uptr_t rwin_save; - unsigned int psr; + unsigned int psr, ufp; unsigned pc, npc; sigset_t set; compat_sigset_t seta; @@ -158,11 +170,16 @@ void do_sigreturn32(struct pt_regs *regs) sf = (struct signal_frame32 __user *) regs->u_regs[UREG_FP]; /* 1. Make sure we are not getting garbage from the user */ - if (!access_ok(VERIFY_READ, sf, sizeof(*sf)) || - (((unsigned long) sf) & 3)) + if (invalid_frame_pointer(sf, sizeof(*sf))) + goto segv; + + if (get_user(ufp, &sf->info.si_regs.u_regs[UREG_FP])) + goto segv; + + if (ufp & 0x7) goto segv; - if (get_user(pc, &sf->info.si_regs.pc) || + if (__get_user(pc, &sf->info.si_regs.pc) || __get_user(npc, &sf->info.si_regs.npc)) goto segv; @@ -227,7 +244,7 @@ segv: asmlinkage void do_rt_sigreturn32(struct pt_regs *regs) { struct rt_signal_frame32 __user *sf; - unsigned int psr, pc, npc; + unsigned int psr, pc, npc, ufp; compat_uptr_t fpu_save; compat_uptr_t rwin_save; sigset_t set; @@ -242,11 +259,16 @@ asmlinkage void do_rt_sigreturn32(struct pt_regs *regs) sf = (struct rt_signal_frame32 __user *) regs->u_regs[UREG_FP]; /* 1. Make sure we are not getting garbage from the user */ - if (!access_ok(VERIFY_READ, sf, sizeof(*sf)) || - (((unsigned long) sf) & 3)) + if (invalid_frame_pointer(sf, sizeof(*sf))) goto segv; - if (get_user(pc, &sf->regs.pc) || + if (get_user(ufp, &sf->regs.u_regs[UREG_FP])) + goto segv; + + if (ufp & 0x7) + goto segv; + + if (__get_user(pc, &sf->regs.pc) || __get_user(npc, &sf->regs.npc)) goto segv; @@ -307,14 +329,6 @@ segv: force_sig(SIGSEGV, current); } -/* Checks if the fp is valid */ -static int invalid_frame_pointer(void __user *fp, int fplen) -{ - if ((((unsigned long) fp) & 7) || ((unsigned long)fp) > 0x100000000ULL - fplen) - return 1; - return 0; -} - static void __user *get_sigframe(struct ksignal *ksig, struct pt_regs *regs, unsigned long framesize) { unsigned long sp; diff --git a/kernel/arch/sparc/kernel/signal_32.c b/kernel/arch/sparc/kernel/signal_32.c index 52aa5e4ce..9c0c8fd0b 100644 --- a/kernel/arch/sparc/kernel/signal_32.c +++ b/kernel/arch/sparc/kernel/signal_32.c @@ -60,10 +60,22 @@ struct rt_signal_frame { #define SF_ALIGNEDSZ (((sizeof(struct signal_frame) + 7) & (~7))) #define RT_ALIGNEDSZ (((sizeof(struct rt_signal_frame) + 7) & (~7))) +/* Checks if the fp is valid. We always build signal frames which are + * 16-byte aligned, therefore we can always enforce that the restore + * frame has that property as well. + */ +static inline bool invalid_frame_pointer(void __user *fp, int fplen) +{ + if ((((unsigned long) fp) & 15) || !__access_ok((unsigned long)fp, fplen)) + return true; + + return false; +} + asmlinkage void do_sigreturn(struct pt_regs *regs) { + unsigned long up_psr, pc, npc, ufp; struct signal_frame __user *sf; - unsigned long up_psr, pc, npc; sigset_t set; __siginfo_fpu_t __user *fpu_save; __siginfo_rwin_t __user *rwin_save; @@ -77,10 +89,13 @@ asmlinkage void do_sigreturn(struct pt_regs *regs) sf = (struct signal_frame __user *) regs->u_regs[UREG_FP]; /* 1. Make sure we are not getting garbage from the user */ - if (!access_ok(VERIFY_READ, sf, sizeof(*sf))) + if (invalid_frame_pointer(sf, sizeof(*sf))) + goto segv_and_exit; + + if (get_user(ufp, &sf->info.si_regs.u_regs[UREG_FP])) goto segv_and_exit; - if (((unsigned long) sf) & 3) + if (ufp & 0x7) goto segv_and_exit; err = __get_user(pc, &sf->info.si_regs.pc); @@ -127,7 +142,7 @@ segv_and_exit: asmlinkage void do_rt_sigreturn(struct pt_regs *regs) { struct rt_signal_frame __user *sf; - unsigned int psr, pc, npc; + unsigned int psr, pc, npc, ufp; __siginfo_fpu_t __user *fpu_save; __siginfo_rwin_t __user *rwin_save; sigset_t set; @@ -135,8 +150,13 @@ asmlinkage void do_rt_sigreturn(struct pt_regs *regs) synchronize_user_stack(); sf = (struct rt_signal_frame __user *) regs->u_regs[UREG_FP]; - if (!access_ok(VERIFY_READ, sf, sizeof(*sf)) || - (((unsigned long) sf) & 0x03)) + if (invalid_frame_pointer(sf, sizeof(*sf))) + goto segv; + + if (get_user(ufp, &sf->regs.u_regs[UREG_FP])) + goto segv; + + if (ufp & 0x7) goto segv; err = __get_user(pc, &sf->regs.pc); @@ -178,15 +198,6 @@ segv: force_sig(SIGSEGV, current); } -/* Checks if the fp is valid */ -static inline int invalid_frame_pointer(void __user *fp, int fplen) -{ - if ((((unsigned long) fp) & 7) || !__access_ok((unsigned long)fp, fplen)) - return 1; - - return 0; -} - static inline void __user *get_sigframe(struct ksignal *ksig, struct pt_regs *regs, unsigned long framesize) { unsigned long sp = regs->u_regs[UREG_FP]; diff --git a/kernel/arch/sparc/kernel/signal_64.c b/kernel/arch/sparc/kernel/signal_64.c index d88beff47..5ee930c48 100644 --- a/kernel/arch/sparc/kernel/signal_64.c +++ b/kernel/arch/sparc/kernel/signal_64.c @@ -52,7 +52,7 @@ asmlinkage void sparc64_set_context(struct pt_regs *regs) unsigned char fenab; int err; - flush_user_windows(); + synchronize_user_stack(); if (get_thread_wsaved() || (((unsigned long)ucp) & (sizeof(unsigned long)-1)) || (!__access_ok(ucp, sizeof(*ucp)))) @@ -234,6 +234,17 @@ do_sigsegv: goto out; } +/* Checks if the fp is valid. We always build rt signal frames which + * are 16-byte aligned, therefore we can always enforce that the + * restore frame has that property as well. + */ +static bool invalid_frame_pointer(void __user *fp) +{ + if (((unsigned long) fp) & 15) + return true; + return false; +} + struct rt_signal_frame { struct sparc_stackf ss; siginfo_t info; @@ -246,8 +257,8 @@ struct rt_signal_frame { void do_rt_sigreturn(struct pt_regs *regs) { + unsigned long tpc, tnpc, tstate, ufp; struct rt_signal_frame __user *sf; - unsigned long tpc, tnpc, tstate; __siginfo_fpu_t __user *fpu_save; __siginfo_rwin_t __user *rwin_save; sigset_t set; @@ -261,10 +272,16 @@ void do_rt_sigreturn(struct pt_regs *regs) (regs->u_regs [UREG_FP] + STACK_BIAS); /* 1. Make sure we are not getting garbage from the user */ - if (((unsigned long) sf) & 3) + if (invalid_frame_pointer(sf)) + goto segv; + + if (get_user(ufp, &sf->regs.u_regs[UREG_FP])) goto segv; - err = get_user(tpc, &sf->regs.tpc); + if ((ufp + STACK_BIAS) & 0x7) + goto segv; + + err = __get_user(tpc, &sf->regs.tpc); err |= __get_user(tnpc, &sf->regs.tnpc); if (test_thread_flag(TIF_32BIT)) { tpc &= 0xffffffff; @@ -308,14 +325,6 @@ segv: force_sig(SIGSEGV, current); } -/* Checks if the fp is valid */ -static int invalid_frame_pointer(void __user *fp) -{ - if (((unsigned long) fp) & 15) - return 1; - return 0; -} - static inline void __user *get_sigframe(struct ksignal *ksig, struct pt_regs *regs, unsigned long framesize) { unsigned long sp = regs->u_regs[UREG_FP] + STACK_BIAS; diff --git a/kernel/arch/sparc/kernel/sigutil_32.c b/kernel/arch/sparc/kernel/sigutil_32.c index 0f6eebe71..e5fe8cef9 100644 --- a/kernel/arch/sparc/kernel/sigutil_32.c +++ b/kernel/arch/sparc/kernel/sigutil_32.c @@ -48,6 +48,10 @@ int save_fpu_state(struct pt_regs *regs, __siginfo_fpu_t __user *fpu) int restore_fpu_state(struct pt_regs *regs, __siginfo_fpu_t __user *fpu) { int err; + + if (((unsigned long) fpu) & 3) + return -EFAULT; + #ifdef CONFIG_SMP if (test_tsk_thread_flag(current, TIF_USEDFPU)) regs->psr &= ~PSR_EF; @@ -97,7 +101,10 @@ int restore_rwin_state(__siginfo_rwin_t __user *rp) struct thread_info *t = current_thread_info(); int i, wsaved, err; - __get_user(wsaved, &rp->wsaved); + if (((unsigned long) rp) & 3) + return -EFAULT; + + get_user(wsaved, &rp->wsaved); if (wsaved > NSWINS) return -EFAULT; diff --git a/kernel/arch/sparc/kernel/sigutil_64.c b/kernel/arch/sparc/kernel/sigutil_64.c index 387834a9c..36aadcbea 100644 --- a/kernel/arch/sparc/kernel/sigutil_64.c +++ b/kernel/arch/sparc/kernel/sigutil_64.c @@ -37,7 +37,10 @@ int restore_fpu_state(struct pt_regs *regs, __siginfo_fpu_t __user *fpu) unsigned long fprs; int err; - err = __get_user(fprs, &fpu->si_fprs); + if (((unsigned long) fpu) & 7) + return -EFAULT; + + err = get_user(fprs, &fpu->si_fprs); fprs_write(0); regs->tstate &= ~TSTATE_PEF; if (fprs & FPRS_DL) @@ -72,7 +75,10 @@ int restore_rwin_state(__siginfo_rwin_t __user *rp) struct thread_info *t = current_thread_info(); int i, wsaved, err; - __get_user(wsaved, &rp->wsaved); + if (((unsigned long) rp) & 7) + return -EFAULT; + + get_user(wsaved, &rp->wsaved); if (wsaved > NSWINS) return -EFAULT; diff --git a/kernel/arch/sparc/kernel/sparc_ksyms_64.c b/kernel/arch/sparc/kernel/sparc_ksyms_64.c index a92d5d2c4..51b25325a 100644 --- a/kernel/arch/sparc/kernel/sparc_ksyms_64.c +++ b/kernel/arch/sparc/kernel/sparc_ksyms_64.c @@ -27,7 +27,6 @@ EXPORT_SYMBOL(__flushw_user); EXPORT_SYMBOL_GPL(real_hard_smp_processor_id); /* from head_64.S */ -EXPORT_SYMBOL(__ret_efault); EXPORT_SYMBOL(tlb_type); EXPORT_SYMBOL(sun4v_chip_type); EXPORT_SYMBOL(prom_root_node); diff --git a/kernel/arch/sparc/kernel/spiterrs.S b/kernel/arch/sparc/kernel/spiterrs.S index c357e40ff..4a73009f6 100644 --- a/kernel/arch/sparc/kernel/spiterrs.S +++ b/kernel/arch/sparc/kernel/spiterrs.S @@ -85,8 +85,7 @@ __spitfire_cee_trap_continue: ba,pt %xcc, etraptl1 rd %pc, %g7 - ba,pt %xcc, 2f - nop + ba,a,pt %xcc, 2f 1: ba,pt %xcc, etrap_irq rd %pc, %g7 @@ -100,8 +99,7 @@ __spitfire_cee_trap_continue: mov %l5, %o2 call spitfire_access_error add %sp, PTREGS_OFF, %o0 - ba,pt %xcc, rtrap - nop + ba,a,pt %xcc, rtrap .size __spitfire_access_error,.-__spitfire_access_error /* This is the trap handler entry point for ECC correctable @@ -179,8 +177,7 @@ __spitfire_data_access_exception_tl1: mov %l5, %o2 call spitfire_data_access_exception_tl1 add %sp, PTREGS_OFF, %o0 - ba,pt %xcc, rtrap - nop + ba,a,pt %xcc, rtrap .size __spitfire_data_access_exception_tl1,.-__spitfire_data_access_exception_tl1 .type __spitfire_data_access_exception,#function @@ -200,8 +197,7 @@ __spitfire_data_access_exception: mov %l5, %o2 call spitfire_data_access_exception add %sp, PTREGS_OFF, %o0 - ba,pt %xcc, rtrap - nop + ba,a,pt %xcc, rtrap .size __spitfire_data_access_exception,.-__spitfire_data_access_exception .type __spitfire_insn_access_exception_tl1,#function @@ -220,8 +216,7 @@ __spitfire_insn_access_exception_tl1: mov %l5, %o2 call spitfire_insn_access_exception_tl1 add %sp, PTREGS_OFF, %o0 - ba,pt %xcc, rtrap - nop + ba,a,pt %xcc, rtrap .size __spitfire_insn_access_exception_tl1,.-__spitfire_insn_access_exception_tl1 .type __spitfire_insn_access_exception,#function @@ -240,6 +235,5 @@ __spitfire_insn_access_exception: mov %l5, %o2 call spitfire_insn_access_exception add %sp, PTREGS_OFF, %o0 - ba,pt %xcc, rtrap - nop + ba,a,pt %xcc, rtrap .size __spitfire_insn_access_exception,.-__spitfire_insn_access_exception diff --git a/kernel/arch/sparc/kernel/syscalls.S b/kernel/arch/sparc/kernel/syscalls.S index bb0008927..c4a1b5c40 100644 --- a/kernel/arch/sparc/kernel/syscalls.S +++ b/kernel/arch/sparc/kernel/syscalls.S @@ -158,7 +158,25 @@ linux_syscall_trace32: add %sp, PTREGS_OFF, %o0 brnz,pn %o0, 3f mov -ENOSYS, %o0 + + /* Syscall tracing can modify the registers. */ + ldx [%sp + PTREGS_OFF + PT_V9_G1], %g1 + sethi %hi(sys_call_table32), %l7 + ldx [%sp + PTREGS_OFF + PT_V9_I0], %i0 + or %l7, %lo(sys_call_table32), %l7 + ldx [%sp + PTREGS_OFF + PT_V9_I1], %i1 + ldx [%sp + PTREGS_OFF + PT_V9_I2], %i2 + ldx [%sp + PTREGS_OFF + PT_V9_I3], %i3 + ldx [%sp + PTREGS_OFF + PT_V9_I4], %i4 + ldx [%sp + PTREGS_OFF + PT_V9_I5], %i5 + + cmp %g1, NR_syscalls + bgeu,pn %xcc, 3f + mov -ENOSYS, %o0 + + sll %g1, 2, %l4 srl %i0, 0, %o0 + lduw [%l7 + %l4], %l7 srl %i4, 0, %o4 srl %i1, 0, %o1 srl %i2, 0, %o2 @@ -170,7 +188,25 @@ linux_syscall_trace: add %sp, PTREGS_OFF, %o0 brnz,pn %o0, 3f mov -ENOSYS, %o0 + + /* Syscall tracing can modify the registers. */ + ldx [%sp + PTREGS_OFF + PT_V9_G1], %g1 + sethi %hi(sys_call_table64), %l7 + ldx [%sp + PTREGS_OFF + PT_V9_I0], %i0 + or %l7, %lo(sys_call_table64), %l7 + ldx [%sp + PTREGS_OFF + PT_V9_I1], %i1 + ldx [%sp + PTREGS_OFF + PT_V9_I2], %i2 + ldx [%sp + PTREGS_OFF + PT_V9_I3], %i3 + ldx [%sp + PTREGS_OFF + PT_V9_I4], %i4 + ldx [%sp + PTREGS_OFF + PT_V9_I5], %i5 + + cmp %g1, NR_syscalls + bgeu,pn %xcc, 3f + mov -ENOSYS, %o0 + + sll %g1, 2, %l4 mov %i0, %o0 + lduw [%l7 + %l4], %l7 mov %i1, %o1 mov %i2, %o2 mov %i3, %o3 diff --git a/kernel/arch/sparc/kernel/tsb.S b/kernel/arch/sparc/kernel/tsb.S index be98685c1..d568c8207 100644 --- a/kernel/arch/sparc/kernel/tsb.S +++ b/kernel/arch/sparc/kernel/tsb.S @@ -29,13 +29,17 @@ */ tsb_miss_dtlb: mov TLB_TAG_ACCESS, %g4 + ldxa [%g4] ASI_DMMU, %g4 + srlx %g4, PAGE_SHIFT, %g4 ba,pt %xcc, tsb_miss_page_table_walk - ldxa [%g4] ASI_DMMU, %g4 + sllx %g4, PAGE_SHIFT, %g4 tsb_miss_itlb: mov TLB_TAG_ACCESS, %g4 + ldxa [%g4] ASI_IMMU, %g4 + srlx %g4, PAGE_SHIFT, %g4 ba,pt %xcc, tsb_miss_page_table_walk - ldxa [%g4] ASI_IMMU, %g4 + sllx %g4, PAGE_SHIFT, %g4 /* At this point we have: * %g1 -- PAGE_SIZE TSB entry address @@ -284,6 +288,10 @@ tsb_do_dtlb_fault: nop .previous + /* Clear context ID bits. */ + srlx %g5, PAGE_SHIFT, %g5 + sllx %g5, PAGE_SHIFT, %g5 + be,pt %xcc, sparc64_realfault_common mov FAULT_CODE_DTLB, %g4 ba,pt %xcc, winfix_trampoline diff --git a/kernel/arch/sparc/kernel/urtt_fill.S b/kernel/arch/sparc/kernel/urtt_fill.S new file mode 100644 index 000000000..5604a2b05 --- /dev/null +++ b/kernel/arch/sparc/kernel/urtt_fill.S @@ -0,0 +1,98 @@ +#include +#include +#include +#include +#include + + .text + .align 8 + .globl user_rtt_fill_fixup_common +user_rtt_fill_fixup_common: + rdpr %cwp, %g1 + add %g1, 1, %g1 + wrpr %g1, 0x0, %cwp + + rdpr %wstate, %g2 + sll %g2, 3, %g2 + wrpr %g2, 0x0, %wstate + + /* We know %canrestore and %otherwin are both zero. */ + + sethi %hi(sparc64_kern_pri_context), %g2 + ldx [%g2 + %lo(sparc64_kern_pri_context)], %g2 + mov PRIMARY_CONTEXT, %g1 + +661: stxa %g2, [%g1] ASI_DMMU + .section .sun4v_1insn_patch, "ax" + .word 661b + stxa %g2, [%g1] ASI_MMU + .previous + + sethi %hi(KERNBASE), %g1 + flush %g1 + + mov %g4, %l4 + mov %g5, %l5 + brnz,pn %g3, 1f + mov %g3, %l3 + + or %g4, FAULT_CODE_WINFIXUP, %g4 + stb %g4, [%g6 + TI_FAULT_CODE] + stx %g5, [%g6 + TI_FAULT_ADDR] +1: + mov %g6, %l1 + wrpr %g0, 0x0, %tl + +661: nop + .section .sun4v_1insn_patch, "ax" + .word 661b + SET_GL(0) + .previous + + wrpr %g0, RTRAP_PSTATE, %pstate + + mov %l1, %g6 + ldx [%g6 + TI_TASK], %g4 + LOAD_PER_CPU_BASE(%g5, %g6, %g1, %g2, %g3) + + brnz,pn %l3, 1f + nop + + call do_sparc64_fault + add %sp, PTREGS_OFF, %o0 + ba,pt %xcc, rtrap + nop + +1: cmp %g3, 2 + bne,pn %xcc, 2f + nop + + sethi %hi(tlb_type), %g1 + lduw [%g1 + %lo(tlb_type)], %g1 + cmp %g1, 3 + bne,pt %icc, 1f + add %sp, PTREGS_OFF, %o0 + mov %l4, %o2 + call sun4v_do_mna + mov %l5, %o1 + ba,a,pt %xcc, rtrap +1: mov %l4, %o1 + mov %l5, %o2 + call mem_address_unaligned + nop + ba,a,pt %xcc, rtrap + +2: sethi %hi(tlb_type), %g1 + mov %l4, %o1 + lduw [%g1 + %lo(tlb_type)], %g1 + mov %l5, %o2 + cmp %g1, 3 + bne,pt %icc, 1f + add %sp, PTREGS_OFF, %o0 + call sun4v_data_access_exception + nop + ba,a,pt %xcc, rtrap + +1: call spitfire_data_access_exception + nop + ba,a,pt %xcc, rtrap diff --git a/kernel/arch/sparc/kernel/utrap.S b/kernel/arch/sparc/kernel/utrap.S index b7f0f3f3a..c731e8023 100644 --- a/kernel/arch/sparc/kernel/utrap.S +++ b/kernel/arch/sparc/kernel/utrap.S @@ -11,8 +11,7 @@ utrap_trap: /* %g3=handler,%g4=level */ mov %l4, %o1 call bad_trap add %sp, PTREGS_OFF, %o0 - ba,pt %xcc, rtrap - nop + ba,a,pt %xcc, rtrap invoke_utrap: sllx %g3, 3, %g3 diff --git a/kernel/arch/sparc/kernel/vmlinux.lds.S b/kernel/arch/sparc/kernel/vmlinux.lds.S index f1a2f688b..4a41d412d 100644 --- a/kernel/arch/sparc/kernel/vmlinux.lds.S +++ b/kernel/arch/sparc/kernel/vmlinux.lds.S @@ -33,6 +33,10 @@ ENTRY(_start) jiffies = jiffies_64; #endif +#ifdef CONFIG_SPARC64 +ASSERT((swapper_tsb == 0x0000000000408000), "Error: sparc64 early assembler too large") +#endif + SECTIONS { #ifdef CONFIG_SPARC64 diff --git a/kernel/arch/sparc/kernel/winfixup.S b/kernel/arch/sparc/kernel/winfixup.S index 1e67ce958..855019a85 100644 --- a/kernel/arch/sparc/kernel/winfixup.S +++ b/kernel/arch/sparc/kernel/winfixup.S @@ -32,8 +32,7 @@ fill_fixup: rd %pc, %g7 call do_sparc64_fault add %sp, PTREGS_OFF, %o0 - ba,pt %xcc, rtrap - nop + ba,a,pt %xcc, rtrap /* Be very careful about usage of the trap globals here. * You cannot touch %g5 as that has the fault information. diff --git a/kernel/arch/sparc/lib/GENcopy_from_user.S b/kernel/arch/sparc/lib/GENcopy_from_user.S index b7d0bd6b1..69a439fa2 100644 --- a/kernel/arch/sparc/lib/GENcopy_from_user.S +++ b/kernel/arch/sparc/lib/GENcopy_from_user.S @@ -3,11 +3,11 @@ * Copyright (C) 2007 David S. Miller (davem@davemloft.net) */ -#define EX_LD(x) \ +#define EX_LD(x,y) \ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_one; \ + .word 98b, y; \ .text; \ .align 4; diff --git a/kernel/arch/sparc/lib/GENcopy_to_user.S b/kernel/arch/sparc/lib/GENcopy_to_user.S index 780550e1a..9947427ce 100644 --- a/kernel/arch/sparc/lib/GENcopy_to_user.S +++ b/kernel/arch/sparc/lib/GENcopy_to_user.S @@ -3,11 +3,11 @@ * Copyright (C) 2007 David S. Miller (davem@davemloft.net) */ -#define EX_ST(x) \ +#define EX_ST(x,y) \ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_one; \ + .word 98b, y; \ .text; \ .align 4; diff --git a/kernel/arch/sparc/lib/GENmemcpy.S b/kernel/arch/sparc/lib/GENmemcpy.S index 89358ee94..059ea24ad 100644 --- a/kernel/arch/sparc/lib/GENmemcpy.S +++ b/kernel/arch/sparc/lib/GENmemcpy.S @@ -4,21 +4,18 @@ */ #ifdef __KERNEL__ +#include #define GLOBAL_SPARE %g7 #else #define GLOBAL_SPARE %g5 #endif #ifndef EX_LD -#define EX_LD(x) x +#define EX_LD(x,y) x #endif #ifndef EX_ST -#define EX_ST(x) x -#endif - -#ifndef EX_RETVAL -#define EX_RETVAL(x) x +#define EX_ST(x,y) x #endif #ifndef LOAD @@ -45,6 +42,29 @@ .register %g3,#scratch .text + +#ifndef EX_RETVAL +#define EX_RETVAL(x) x +ENTRY(GEN_retl_o4_1) + add %o4, %o2, %o4 + retl + add %o4, 1, %o0 +ENDPROC(GEN_retl_o4_1) +ENTRY(GEN_retl_g1_8) + add %g1, %o2, %g1 + retl + add %g1, 8, %o0 +ENDPROC(GEN_retl_g1_8) +ENTRY(GEN_retl_o2_4) + retl + add %o2, 4, %o0 +ENDPROC(GEN_retl_o2_4) +ENTRY(GEN_retl_o2_1) + retl + add %o2, 1, %o0 +ENDPROC(GEN_retl_o2_1) +#endif + .align 64 .globl FUNC_NAME @@ -73,8 +93,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ sub %g0, %o4, %o4 sub %o2, %o4, %o2 1: subcc %o4, 1, %o4 - EX_LD(LOAD(ldub, %o1, %g1)) - EX_ST(STORE(stb, %g1, %o0)) + EX_LD(LOAD(ldub, %o1, %g1),GEN_retl_o4_1) + EX_ST(STORE(stb, %g1, %o0),GEN_retl_o4_1) add %o1, 1, %o1 bne,pt %XCC, 1b add %o0, 1, %o0 @@ -82,8 +102,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ andn %o2, 0x7, %g1 sub %o2, %g1, %o2 1: subcc %g1, 0x8, %g1 - EX_LD(LOAD(ldx, %o1, %g2)) - EX_ST(STORE(stx, %g2, %o0)) + EX_LD(LOAD(ldx, %o1, %g2),GEN_retl_g1_8) + EX_ST(STORE(stx, %g2, %o0),GEN_retl_g1_8) add %o1, 0x8, %o1 bne,pt %XCC, 1b add %o0, 0x8, %o0 @@ -100,8 +120,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ 1: subcc %o2, 4, %o2 - EX_LD(LOAD(lduw, %o1, %g1)) - EX_ST(STORE(stw, %g1, %o1 + %o3)) + EX_LD(LOAD(lduw, %o1, %g1),GEN_retl_o2_4) + EX_ST(STORE(stw, %g1, %o1 + %o3),GEN_retl_o2_4) bgu,pt %XCC, 1b add %o1, 4, %o1 @@ -111,8 +131,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ .align 32 90: subcc %o2, 1, %o2 - EX_LD(LOAD(ldub, %o1, %g1)) - EX_ST(STORE(stb, %g1, %o1 + %o3)) + EX_LD(LOAD(ldub, %o1, %g1),GEN_retl_o2_1) + EX_ST(STORE(stb, %g1, %o1 + %o3),GEN_retl_o2_1) bgu,pt %XCC, 90b add %o1, 1, %o1 retl diff --git a/kernel/arch/sparc/lib/Makefile b/kernel/arch/sparc/lib/Makefile index 3269b0234..4f2384a42 100644 --- a/kernel/arch/sparc/lib/Makefile +++ b/kernel/arch/sparc/lib/Makefile @@ -38,7 +38,7 @@ lib-$(CONFIG_SPARC64) += NG4patch.o NG4copy_page.o NG4clear_page.o NG4memset.o lib-$(CONFIG_SPARC64) += GENmemcpy.o GENcopy_from_user.o GENcopy_to_user.o lib-$(CONFIG_SPARC64) += GENpatch.o GENpage.o GENbzero.o -lib-$(CONFIG_SPARC64) += copy_in_user.o user_fixup.o memmove.o +lib-$(CONFIG_SPARC64) += copy_in_user.o memmove.o lib-$(CONFIG_SPARC64) += mcount.o ipcsum.o xor.o hweight.o ffs.o obj-$(CONFIG_SPARC64) += iomap.o diff --git a/kernel/arch/sparc/lib/NG2copy_from_user.S b/kernel/arch/sparc/lib/NG2copy_from_user.S index d5242b8c4..b79a6998d 100644 --- a/kernel/arch/sparc/lib/NG2copy_from_user.S +++ b/kernel/arch/sparc/lib/NG2copy_from_user.S @@ -3,19 +3,19 @@ * Copyright (C) 2007 David S. Miller (davem@davemloft.net) */ -#define EX_LD(x) \ +#define EX_LD(x,y) \ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_one_asi;\ + .word 98b, y; \ .text; \ .align 4; -#define EX_LD_FP(x) \ +#define EX_LD_FP(x,y) \ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_one_asi_fp;\ + .word 98b, y##_fp; \ .text; \ .align 4; diff --git a/kernel/arch/sparc/lib/NG2copy_to_user.S b/kernel/arch/sparc/lib/NG2copy_to_user.S index 4e962d993..dcec55f25 100644 --- a/kernel/arch/sparc/lib/NG2copy_to_user.S +++ b/kernel/arch/sparc/lib/NG2copy_to_user.S @@ -3,19 +3,19 @@ * Copyright (C) 2007 David S. Miller (davem@davemloft.net) */ -#define EX_ST(x) \ +#define EX_ST(x,y) \ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_one_asi;\ + .word 98b, y; \ .text; \ .align 4; -#define EX_ST_FP(x) \ +#define EX_ST_FP(x,y) \ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_one_asi_fp;\ + .word 98b, y##_fp; \ .text; \ .align 4; diff --git a/kernel/arch/sparc/lib/NG2memcpy.S b/kernel/arch/sparc/lib/NG2memcpy.S index d5f585df2..c629dbd12 100644 --- a/kernel/arch/sparc/lib/NG2memcpy.S +++ b/kernel/arch/sparc/lib/NG2memcpy.S @@ -4,6 +4,7 @@ */ #ifdef __KERNEL__ +#include #include #include #define GLOBAL_SPARE %g7 @@ -32,21 +33,17 @@ #endif #ifndef EX_LD -#define EX_LD(x) x +#define EX_LD(x,y) x #endif #ifndef EX_LD_FP -#define EX_LD_FP(x) x +#define EX_LD_FP(x,y) x #endif #ifndef EX_ST -#define EX_ST(x) x +#define EX_ST(x,y) x #endif #ifndef EX_ST_FP -#define EX_ST_FP(x) x -#endif - -#ifndef EX_RETVAL -#define EX_RETVAL(x) x +#define EX_ST_FP(x,y) x #endif #ifndef LOAD @@ -140,45 +137,110 @@ fsrc2 %x6, %f12; \ fsrc2 %x7, %f14; #define FREG_LOAD_1(base, x0) \ - EX_LD_FP(LOAD(ldd, base + 0x00, %x0)) + EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1) #define FREG_LOAD_2(base, x0, x1) \ - EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \ - EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); + EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); #define FREG_LOAD_3(base, x0, x1, x2) \ - EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \ - EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); \ - EX_LD_FP(LOAD(ldd, base + 0x10, %x2)); + EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1); #define FREG_LOAD_4(base, x0, x1, x2, x3) \ - EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \ - EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); \ - EX_LD_FP(LOAD(ldd, base + 0x10, %x2)); \ - EX_LD_FP(LOAD(ldd, base + 0x18, %x3)); + EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x18, %x3), NG2_retl_o2_plus_g1); #define FREG_LOAD_5(base, x0, x1, x2, x3, x4) \ - EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \ - EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); \ - EX_LD_FP(LOAD(ldd, base + 0x10, %x2)); \ - EX_LD_FP(LOAD(ldd, base + 0x18, %x3)); \ - EX_LD_FP(LOAD(ldd, base + 0x20, %x4)); + EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x18, %x3), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x20, %x4), NG2_retl_o2_plus_g1); #define FREG_LOAD_6(base, x0, x1, x2, x3, x4, x5) \ - EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \ - EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); \ - EX_LD_FP(LOAD(ldd, base + 0x10, %x2)); \ - EX_LD_FP(LOAD(ldd, base + 0x18, %x3)); \ - EX_LD_FP(LOAD(ldd, base + 0x20, %x4)); \ - EX_LD_FP(LOAD(ldd, base + 0x28, %x5)); + EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x18, %x3), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x20, %x4), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x28, %x5), NG2_retl_o2_plus_g1); #define FREG_LOAD_7(base, x0, x1, x2, x3, x4, x5, x6) \ - EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \ - EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); \ - EX_LD_FP(LOAD(ldd, base + 0x10, %x2)); \ - EX_LD_FP(LOAD(ldd, base + 0x18, %x3)); \ - EX_LD_FP(LOAD(ldd, base + 0x20, %x4)); \ - EX_LD_FP(LOAD(ldd, base + 0x28, %x5)); \ - EX_LD_FP(LOAD(ldd, base + 0x30, %x6)); + EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x18, %x3), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x20, %x4), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x28, %x5), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x30, %x6), NG2_retl_o2_plus_g1); .register %g2,#scratch .register %g3,#scratch .text +#ifndef EX_RETVAL +#define EX_RETVAL(x) x +__restore_fp: + VISExitHalf +__restore_asi: + retl + wr %g0, ASI_AIUS, %asi +ENTRY(NG2_retl_o2) + ba,pt %xcc, __restore_asi + mov %o2, %o0 +ENDPROC(NG2_retl_o2) +ENTRY(NG2_retl_o2_plus_1) + ba,pt %xcc, __restore_asi + add %o2, 1, %o0 +ENDPROC(NG2_retl_o2_plus_1) +ENTRY(NG2_retl_o2_plus_4) + ba,pt %xcc, __restore_asi + add %o2, 4, %o0 +ENDPROC(NG2_retl_o2_plus_4) +ENTRY(NG2_retl_o2_plus_8) + ba,pt %xcc, __restore_asi + add %o2, 8, %o0 +ENDPROC(NG2_retl_o2_plus_8) +ENTRY(NG2_retl_o2_plus_o4_plus_1) + add %o4, 1, %o4 + ba,pt %xcc, __restore_asi + add %o2, %o4, %o0 +ENDPROC(NG2_retl_o2_plus_o4_plus_1) +ENTRY(NG2_retl_o2_plus_o4_plus_8) + add %o4, 8, %o4 + ba,pt %xcc, __restore_asi + add %o2, %o4, %o0 +ENDPROC(NG2_retl_o2_plus_o4_plus_8) +ENTRY(NG2_retl_o2_plus_o4_plus_16) + add %o4, 16, %o4 + ba,pt %xcc, __restore_asi + add %o2, %o4, %o0 +ENDPROC(NG2_retl_o2_plus_o4_plus_16) +ENTRY(NG2_retl_o2_plus_g1_fp) + ba,pt %xcc, __restore_fp + add %o2, %g1, %o0 +ENDPROC(NG2_retl_o2_plus_g1_fp) +ENTRY(NG2_retl_o2_plus_g1_plus_64_fp) + add %g1, 64, %g1 + ba,pt %xcc, __restore_fp + add %o2, %g1, %o0 +ENDPROC(NG2_retl_o2_plus_g1_plus_64_fp) +ENTRY(NG2_retl_o2_plus_g1_plus_1) + add %g1, 1, %g1 + ba,pt %xcc, __restore_asi + add %o2, %g1, %o0 +ENDPROC(NG2_retl_o2_plus_g1_plus_1) +ENTRY(NG2_retl_o2_and_7_plus_o4) + and %o2, 7, %o2 + ba,pt %xcc, __restore_asi + add %o2, %o4, %o0 +ENDPROC(NG2_retl_o2_and_7_plus_o4) +ENTRY(NG2_retl_o2_and_7_plus_o4_plus_8) + and %o2, 7, %o2 + add %o4, 8, %o4 + ba,pt %xcc, __restore_asi + add %o2, %o4, %o0 +ENDPROC(NG2_retl_o2_and_7_plus_o4_plus_8) +#endif + .align 64 .globl FUNC_NAME @@ -230,8 +292,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ sub %g0, %o4, %o4 ! bytes to align dst sub %o2, %o4, %o2 1: subcc %o4, 1, %o4 - EX_LD(LOAD(ldub, %o1, %g1)) - EX_ST(STORE(stb, %g1, %o0)) + EX_LD(LOAD(ldub, %o1, %g1), NG2_retl_o2_plus_o4_plus_1) + EX_ST(STORE(stb, %g1, %o0), NG2_retl_o2_plus_o4_plus_1) add %o1, 1, %o1 bne,pt %XCC, 1b add %o0, 1, %o0 @@ -281,11 +343,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ nop /* fall through for 0 < low bits < 8 */ 110: sub %o4, 64, %g2 - EX_LD_FP(LOAD_BLK(%g2, %f0)) -1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3)) - EX_LD_FP(LOAD_BLK(%o4, %f16)) + EX_LD_FP(LOAD_BLK(%g2, %f0), NG2_retl_o2_plus_g1) +1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1) + EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1) FREG_FROB(f0, f2, f4, f6, f8, f10, f12, f14, f16) - EX_ST_FP(STORE_BLK(%f0, %o4 + %g3)) + EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1) FREG_MOVE_8(f16, f18, f20, f22, f24, f26, f28, f30) subcc %g1, 64, %g1 add %o4, 64, %o4 @@ -296,10 +358,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ 120: sub %o4, 56, %g2 FREG_LOAD_7(%g2, f0, f2, f4, f6, f8, f10, f12) -1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3)) - EX_LD_FP(LOAD_BLK(%o4, %f16)) +1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1) + EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1) FREG_FROB(f0, f2, f4, f6, f8, f10, f12, f16, f18) - EX_ST_FP(STORE_BLK(%f0, %o4 + %g3)) + EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1) FREG_MOVE_7(f18, f20, f22, f24, f26, f28, f30) subcc %g1, 64, %g1 add %o4, 64, %o4 @@ -310,10 +372,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ 130: sub %o4, 48, %g2 FREG_LOAD_6(%g2, f0, f2, f4, f6, f8, f10) -1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3)) - EX_LD_FP(LOAD_BLK(%o4, %f16)) +1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1) + EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1) FREG_FROB(f0, f2, f4, f6, f8, f10, f16, f18, f20) - EX_ST_FP(STORE_BLK(%f0, %o4 + %g3)) + EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1) FREG_MOVE_6(f20, f22, f24, f26, f28, f30) subcc %g1, 64, %g1 add %o4, 64, %o4 @@ -324,10 +386,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ 140: sub %o4, 40, %g2 FREG_LOAD_5(%g2, f0, f2, f4, f6, f8) -1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3)) - EX_LD_FP(LOAD_BLK(%o4, %f16)) +1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1) + EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1) FREG_FROB(f0, f2, f4, f6, f8, f16, f18, f20, f22) - EX_ST_FP(STORE_BLK(%f0, %o4 + %g3)) + EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1) FREG_MOVE_5(f22, f24, f26, f28, f30) subcc %g1, 64, %g1 add %o4, 64, %o4 @@ -338,10 +400,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ 150: sub %o4, 32, %g2 FREG_LOAD_4(%g2, f0, f2, f4, f6) -1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3)) - EX_LD_FP(LOAD_BLK(%o4, %f16)) +1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1) + EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1) FREG_FROB(f0, f2, f4, f6, f16, f18, f20, f22, f24) - EX_ST_FP(STORE_BLK(%f0, %o4 + %g3)) + EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1) FREG_MOVE_4(f24, f26, f28, f30) subcc %g1, 64, %g1 add %o4, 64, %o4 @@ -352,10 +414,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ 160: sub %o4, 24, %g2 FREG_LOAD_3(%g2, f0, f2, f4) -1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3)) - EX_LD_FP(LOAD_BLK(%o4, %f16)) +1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1) + EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1) FREG_FROB(f0, f2, f4, f16, f18, f20, f22, f24, f26) - EX_ST_FP(STORE_BLK(%f0, %o4 + %g3)) + EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1) FREG_MOVE_3(f26, f28, f30) subcc %g1, 64, %g1 add %o4, 64, %o4 @@ -366,10 +428,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ 170: sub %o4, 16, %g2 FREG_LOAD_2(%g2, f0, f2) -1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3)) - EX_LD_FP(LOAD_BLK(%o4, %f16)) +1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1) + EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1) FREG_FROB(f0, f2, f16, f18, f20, f22, f24, f26, f28) - EX_ST_FP(STORE_BLK(%f0, %o4 + %g3)) + EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1) FREG_MOVE_2(f28, f30) subcc %g1, 64, %g1 add %o4, 64, %o4 @@ -380,10 +442,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ 180: sub %o4, 8, %g2 FREG_LOAD_1(%g2, f0) -1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3)) - EX_LD_FP(LOAD_BLK(%o4, %f16)) +1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1) + EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1) FREG_FROB(f0, f16, f18, f20, f22, f24, f26, f28, f30) - EX_ST_FP(STORE_BLK(%f0, %o4 + %g3)) + EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1) FREG_MOVE_1(f30) subcc %g1, 64, %g1 add %o4, 64, %o4 @@ -393,10 +455,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ nop 190: -1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3)) +1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1) subcc %g1, 64, %g1 - EX_LD_FP(LOAD_BLK(%o4, %f0)) - EX_ST_FP(STORE_BLK(%f0, %o4 + %g3)) + EX_LD_FP(LOAD_BLK(%o4, %f0), NG2_retl_o2_plus_g1_plus_64) + EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1_plus_64) add %o4, 64, %o4 bne,pt %xcc, 1b LOAD(prefetch, %o4 + 64, #one_read) @@ -423,28 +485,28 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ andn %o2, 0xf, %o4 and %o2, 0xf, %o2 1: subcc %o4, 0x10, %o4 - EX_LD(LOAD(ldx, %o1, %o5)) + EX_LD(LOAD(ldx, %o1, %o5), NG2_retl_o2_plus_o4_plus_16) add %o1, 0x08, %o1 - EX_LD(LOAD(ldx, %o1, %g1)) + EX_LD(LOAD(ldx, %o1, %g1), NG2_retl_o2_plus_o4_plus_16) sub %o1, 0x08, %o1 - EX_ST(STORE(stx, %o5, %o1 + GLOBAL_SPARE)) + EX_ST(STORE(stx, %o5, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_o4_plus_16) add %o1, 0x8, %o1 - EX_ST(STORE(stx, %g1, %o1 + GLOBAL_SPARE)) + EX_ST(STORE(stx, %g1, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_o4_plus_8) bgu,pt %XCC, 1b add %o1, 0x8, %o1 73: andcc %o2, 0x8, %g0 be,pt %XCC, 1f nop sub %o2, 0x8, %o2 - EX_LD(LOAD(ldx, %o1, %o5)) - EX_ST(STORE(stx, %o5, %o1 + GLOBAL_SPARE)) + EX_LD(LOAD(ldx, %o1, %o5), NG2_retl_o2_plus_8) + EX_ST(STORE(stx, %o5, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_8) add %o1, 0x8, %o1 1: andcc %o2, 0x4, %g0 be,pt %XCC, 1f nop sub %o2, 0x4, %o2 - EX_LD(LOAD(lduw, %o1, %o5)) - EX_ST(STORE(stw, %o5, %o1 + GLOBAL_SPARE)) + EX_LD(LOAD(lduw, %o1, %o5), NG2_retl_o2_plus_4) + EX_ST(STORE(stw, %o5, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_4) add %o1, 0x4, %o1 1: cmp %o2, 0 be,pt %XCC, 85f @@ -460,8 +522,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ sub %o2, %g1, %o2 1: subcc %g1, 1, %g1 - EX_LD(LOAD(ldub, %o1, %o5)) - EX_ST(STORE(stb, %o5, %o1 + GLOBAL_SPARE)) + EX_LD(LOAD(ldub, %o1, %o5), NG2_retl_o2_plus_g1_plus_1) + EX_ST(STORE(stb, %o5, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_g1_plus_1) bgu,pt %icc, 1b add %o1, 1, %o1 @@ -477,16 +539,16 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ 8: mov 64, GLOBAL_SPARE andn %o1, 0x7, %o1 - EX_LD(LOAD(ldx, %o1, %g2)) + EX_LD(LOAD(ldx, %o1, %g2), NG2_retl_o2) sub GLOBAL_SPARE, %g1, GLOBAL_SPARE andn %o2, 0x7, %o4 sllx %g2, %g1, %g2 1: add %o1, 0x8, %o1 - EX_LD(LOAD(ldx, %o1, %g3)) + EX_LD(LOAD(ldx, %o1, %g3), NG2_retl_o2_and_7_plus_o4) subcc %o4, 0x8, %o4 srlx %g3, GLOBAL_SPARE, %o5 or %o5, %g2, %o5 - EX_ST(STORE(stx, %o5, %o0)) + EX_ST(STORE(stx, %o5, %o0), NG2_retl_o2_and_7_plus_o4_plus_8) add %o0, 0x8, %o0 bgu,pt %icc, 1b sllx %g3, %g1, %g2 @@ -506,8 +568,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ 1: subcc %o2, 4, %o2 - EX_LD(LOAD(lduw, %o1, %g1)) - EX_ST(STORE(stw, %g1, %o1 + GLOBAL_SPARE)) + EX_LD(LOAD(lduw, %o1, %g1), NG2_retl_o2_plus_4) + EX_ST(STORE(stw, %g1, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_4) bgu,pt %XCC, 1b add %o1, 4, %o1 @@ -517,8 +579,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ .align 32 90: subcc %o2, 1, %o2 - EX_LD(LOAD(ldub, %o1, %g1)) - EX_ST(STORE(stb, %g1, %o1 + GLOBAL_SPARE)) + EX_LD(LOAD(ldub, %o1, %g1), NG2_retl_o2_plus_1) + EX_ST(STORE(stb, %g1, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_1) bgu,pt %XCC, 90b add %o1, 1, %o1 retl diff --git a/kernel/arch/sparc/lib/NG4copy_from_user.S b/kernel/arch/sparc/lib/NG4copy_from_user.S index 2e8ee7ad0..16a286c1a 100644 --- a/kernel/arch/sparc/lib/NG4copy_from_user.S +++ b/kernel/arch/sparc/lib/NG4copy_from_user.S @@ -3,19 +3,19 @@ * Copyright (C) 2012 David S. Miller (davem@davemloft.net) */ -#define EX_LD(x) \ +#define EX_LD(x, y) \ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_one_asi;\ + .word 98b, y; \ .text; \ .align 4; -#define EX_LD_FP(x) \ +#define EX_LD_FP(x,y) \ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_one_asi_fp;\ + .word 98b, y##_fp; \ .text; \ .align 4; diff --git a/kernel/arch/sparc/lib/NG4copy_to_user.S b/kernel/arch/sparc/lib/NG4copy_to_user.S index be0bf4590..6b0276ffc 100644 --- a/kernel/arch/sparc/lib/NG4copy_to_user.S +++ b/kernel/arch/sparc/lib/NG4copy_to_user.S @@ -3,19 +3,19 @@ * Copyright (C) 2012 David S. Miller (davem@davemloft.net) */ -#define EX_ST(x) \ +#define EX_ST(x,y) \ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_one_asi;\ + .word 98b, y; \ .text; \ .align 4; -#define EX_ST_FP(x) \ +#define EX_ST_FP(x,y) \ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_one_asi_fp;\ + .word 98b, y##_fp; \ .text; \ .align 4; diff --git a/kernel/arch/sparc/lib/NG4memcpy.S b/kernel/arch/sparc/lib/NG4memcpy.S index 8e13ee1f4..75bb93b14 100644 --- a/kernel/arch/sparc/lib/NG4memcpy.S +++ b/kernel/arch/sparc/lib/NG4memcpy.S @@ -4,6 +4,7 @@ */ #ifdef __KERNEL__ +#include #include #include #define GLOBAL_SPARE %g7 @@ -46,22 +47,19 @@ #endif #ifndef EX_LD -#define EX_LD(x) x +#define EX_LD(x,y) x #endif #ifndef EX_LD_FP -#define EX_LD_FP(x) x +#define EX_LD_FP(x,y) x #endif #ifndef EX_ST -#define EX_ST(x) x +#define EX_ST(x,y) x #endif #ifndef EX_ST_FP -#define EX_ST_FP(x) x +#define EX_ST_FP(x,y) x #endif -#ifndef EX_RETVAL -#define EX_RETVAL(x) x -#endif #ifndef LOAD #define LOAD(type,addr,dest) type [addr], dest @@ -94,6 +92,158 @@ .register %g3,#scratch .text +#ifndef EX_RETVAL +#define EX_RETVAL(x) x +__restore_asi_fp: + VISExitHalf +__restore_asi: + retl + wr %g0, ASI_AIUS, %asi + +ENTRY(NG4_retl_o2) + ba,pt %xcc, __restore_asi + mov %o2, %o0 +ENDPROC(NG4_retl_o2) +ENTRY(NG4_retl_o2_plus_1) + ba,pt %xcc, __restore_asi + add %o2, 1, %o0 +ENDPROC(NG4_retl_o2_plus_1) +ENTRY(NG4_retl_o2_plus_4) + ba,pt %xcc, __restore_asi + add %o2, 4, %o0 +ENDPROC(NG4_retl_o2_plus_4) +ENTRY(NG4_retl_o2_plus_o5) + ba,pt %xcc, __restore_asi + add %o2, %o5, %o0 +ENDPROC(NG4_retl_o2_plus_o5) +ENTRY(NG4_retl_o2_plus_o5_plus_4) + add %o5, 4, %o5 + ba,pt %xcc, __restore_asi + add %o2, %o5, %o0 +ENDPROC(NG4_retl_o2_plus_o5_plus_4) +ENTRY(NG4_retl_o2_plus_o5_plus_8) + add %o5, 8, %o5 + ba,pt %xcc, __restore_asi + add %o2, %o5, %o0 +ENDPROC(NG4_retl_o2_plus_o5_plus_8) +ENTRY(NG4_retl_o2_plus_o5_plus_16) + add %o5, 16, %o5 + ba,pt %xcc, __restore_asi + add %o2, %o5, %o0 +ENDPROC(NG4_retl_o2_plus_o5_plus_16) +ENTRY(NG4_retl_o2_plus_o5_plus_24) + add %o5, 24, %o5 + ba,pt %xcc, __restore_asi + add %o2, %o5, %o0 +ENDPROC(NG4_retl_o2_plus_o5_plus_24) +ENTRY(NG4_retl_o2_plus_o5_plus_32) + add %o5, 32, %o5 + ba,pt %xcc, __restore_asi + add %o2, %o5, %o0 +ENDPROC(NG4_retl_o2_plus_o5_plus_32) +ENTRY(NG4_retl_o2_plus_g1) + ba,pt %xcc, __restore_asi + add %o2, %g1, %o0 +ENDPROC(NG4_retl_o2_plus_g1) +ENTRY(NG4_retl_o2_plus_g1_plus_1) + add %g1, 1, %g1 + ba,pt %xcc, __restore_asi + add %o2, %g1, %o0 +ENDPROC(NG4_retl_o2_plus_g1_plus_1) +ENTRY(NG4_retl_o2_plus_g1_plus_8) + add %g1, 8, %g1 + ba,pt %xcc, __restore_asi + add %o2, %g1, %o0 +ENDPROC(NG4_retl_o2_plus_g1_plus_8) +ENTRY(NG4_retl_o2_plus_o4) + ba,pt %xcc, __restore_asi + add %o2, %o4, %o0 +ENDPROC(NG4_retl_o2_plus_o4) +ENTRY(NG4_retl_o2_plus_o4_plus_8) + add %o4, 8, %o4 + ba,pt %xcc, __restore_asi + add %o2, %o4, %o0 +ENDPROC(NG4_retl_o2_plus_o4_plus_8) +ENTRY(NG4_retl_o2_plus_o4_plus_16) + add %o4, 16, %o4 + ba,pt %xcc, __restore_asi + add %o2, %o4, %o0 +ENDPROC(NG4_retl_o2_plus_o4_plus_16) +ENTRY(NG4_retl_o2_plus_o4_plus_24) + add %o4, 24, %o4 + ba,pt %xcc, __restore_asi + add %o2, %o4, %o0 +ENDPROC(NG4_retl_o2_plus_o4_plus_24) +ENTRY(NG4_retl_o2_plus_o4_plus_32) + add %o4, 32, %o4 + ba,pt %xcc, __restore_asi + add %o2, %o4, %o0 +ENDPROC(NG4_retl_o2_plus_o4_plus_32) +ENTRY(NG4_retl_o2_plus_o4_plus_40) + add %o4, 40, %o4 + ba,pt %xcc, __restore_asi + add %o2, %o4, %o0 +ENDPROC(NG4_retl_o2_plus_o4_plus_40) +ENTRY(NG4_retl_o2_plus_o4_plus_48) + add %o4, 48, %o4 + ba,pt %xcc, __restore_asi + add %o2, %o4, %o0 +ENDPROC(NG4_retl_o2_plus_o4_plus_48) +ENTRY(NG4_retl_o2_plus_o4_plus_56) + add %o4, 56, %o4 + ba,pt %xcc, __restore_asi + add %o2, %o4, %o0 +ENDPROC(NG4_retl_o2_plus_o4_plus_56) +ENTRY(NG4_retl_o2_plus_o4_plus_64) + add %o4, 64, %o4 + ba,pt %xcc, __restore_asi + add %o2, %o4, %o0 +ENDPROC(NG4_retl_o2_plus_o4_plus_64) +ENTRY(NG4_retl_o2_plus_o4_fp) + ba,pt %xcc, __restore_asi_fp + add %o2, %o4, %o0 +ENDPROC(NG4_retl_o2_plus_o4_fp) +ENTRY(NG4_retl_o2_plus_o4_plus_8_fp) + add %o4, 8, %o4 + ba,pt %xcc, __restore_asi_fp + add %o2, %o4, %o0 +ENDPROC(NG4_retl_o2_plus_o4_plus_8_fp) +ENTRY(NG4_retl_o2_plus_o4_plus_16_fp) + add %o4, 16, %o4 + ba,pt %xcc, __restore_asi_fp + add %o2, %o4, %o0 +ENDPROC(NG4_retl_o2_plus_o4_plus_16_fp) +ENTRY(NG4_retl_o2_plus_o4_plus_24_fp) + add %o4, 24, %o4 + ba,pt %xcc, __restore_asi_fp + add %o2, %o4, %o0 +ENDPROC(NG4_retl_o2_plus_o4_plus_24_fp) +ENTRY(NG4_retl_o2_plus_o4_plus_32_fp) + add %o4, 32, %o4 + ba,pt %xcc, __restore_asi_fp + add %o2, %o4, %o0 +ENDPROC(NG4_retl_o2_plus_o4_plus_32_fp) +ENTRY(NG4_retl_o2_plus_o4_plus_40_fp) + add %o4, 40, %o4 + ba,pt %xcc, __restore_asi_fp + add %o2, %o4, %o0 +ENDPROC(NG4_retl_o2_plus_o4_plus_40_fp) +ENTRY(NG4_retl_o2_plus_o4_plus_48_fp) + add %o4, 48, %o4 + ba,pt %xcc, __restore_asi_fp + add %o2, %o4, %o0 +ENDPROC(NG4_retl_o2_plus_o4_plus_48_fp) +ENTRY(NG4_retl_o2_plus_o4_plus_56_fp) + add %o4, 56, %o4 + ba,pt %xcc, __restore_asi_fp + add %o2, %o4, %o0 +ENDPROC(NG4_retl_o2_plus_o4_plus_56_fp) +ENTRY(NG4_retl_o2_plus_o4_plus_64_fp) + add %o4, 64, %o4 + ba,pt %xcc, __restore_asi_fp + add %o2, %o4, %o0 +ENDPROC(NG4_retl_o2_plus_o4_plus_64_fp) +#endif .align 64 .globl FUNC_NAME @@ -124,12 +274,13 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ brz,pt %g1, 51f sub %o2, %g1, %o2 -1: EX_LD(LOAD(ldub, %o1 + 0x00, %g2)) + +1: EX_LD(LOAD(ldub, %o1 + 0x00, %g2), NG4_retl_o2_plus_g1) add %o1, 1, %o1 subcc %g1, 1, %g1 add %o0, 1, %o0 bne,pt %icc, 1b - EX_ST(STORE(stb, %g2, %o0 - 0x01)) + EX_ST(STORE(stb, %g2, %o0 - 0x01), NG4_retl_o2_plus_g1_plus_1) 51: LOAD(prefetch, %o1 + 0x040, #n_reads_strong) LOAD(prefetch, %o1 + 0x080, #n_reads_strong) @@ -154,43 +305,43 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ brz,pt %g1, .Llarge_aligned sub %o2, %g1, %o2 -1: EX_LD(LOAD(ldx, %o1 + 0x00, %g2)) +1: EX_LD(LOAD(ldx, %o1 + 0x00, %g2), NG4_retl_o2_plus_g1) add %o1, 8, %o1 subcc %g1, 8, %g1 add %o0, 8, %o0 bne,pt %icc, 1b - EX_ST(STORE(stx, %g2, %o0 - 0x08)) + EX_ST(STORE(stx, %g2, %o0 - 0x08), NG4_retl_o2_plus_g1_plus_8) .Llarge_aligned: /* len >= 0x80 && src 8-byte aligned && dest 8-byte aligned */ andn %o2, 0x3f, %o4 sub %o2, %o4, %o2 -1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1)) +1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1), NG4_retl_o2_plus_o4) add %o1, 0x40, %o1 - EX_LD(LOAD(ldx, %o1 - 0x38, %g2)) + EX_LD(LOAD(ldx, %o1 - 0x38, %g2), NG4_retl_o2_plus_o4) subcc %o4, 0x40, %o4 - EX_LD(LOAD(ldx, %o1 - 0x30, %g3)) - EX_LD(LOAD(ldx, %o1 - 0x28, GLOBAL_SPARE)) - EX_LD(LOAD(ldx, %o1 - 0x20, %o5)) - EX_ST(STORE_INIT(%g1, %o0)) + EX_LD(LOAD(ldx, %o1 - 0x30, %g3), NG4_retl_o2_plus_o4_plus_64) + EX_LD(LOAD(ldx, %o1 - 0x28, GLOBAL_SPARE), NG4_retl_o2_plus_o4_plus_64) + EX_LD(LOAD(ldx, %o1 - 0x20, %o5), NG4_retl_o2_plus_o4_plus_64) + EX_ST(STORE_INIT(%g1, %o0), NG4_retl_o2_plus_o4_plus_64) add %o0, 0x08, %o0 - EX_ST(STORE_INIT(%g2, %o0)) + EX_ST(STORE_INIT(%g2, %o0), NG4_retl_o2_plus_o4_plus_56) add %o0, 0x08, %o0 - EX_LD(LOAD(ldx, %o1 - 0x18, %g2)) - EX_ST(STORE_INIT(%g3, %o0)) + EX_LD(LOAD(ldx, %o1 - 0x18, %g2), NG4_retl_o2_plus_o4_plus_48) + EX_ST(STORE_INIT(%g3, %o0), NG4_retl_o2_plus_o4_plus_48) add %o0, 0x08, %o0 - EX_LD(LOAD(ldx, %o1 - 0x10, %g3)) - EX_ST(STORE_INIT(GLOBAL_SPARE, %o0)) + EX_LD(LOAD(ldx, %o1 - 0x10, %g3), NG4_retl_o2_plus_o4_plus_40) + EX_ST(STORE_INIT(GLOBAL_SPARE, %o0), NG4_retl_o2_plus_o4_plus_40) add %o0, 0x08, %o0 - EX_LD(LOAD(ldx, %o1 - 0x08, GLOBAL_SPARE)) - EX_ST(STORE_INIT(%o5, %o0)) + EX_LD(LOAD(ldx, %o1 - 0x08, GLOBAL_SPARE), NG4_retl_o2_plus_o4_plus_32) + EX_ST(STORE_INIT(%o5, %o0), NG4_retl_o2_plus_o4_plus_32) add %o0, 0x08, %o0 - EX_ST(STORE_INIT(%g2, %o0)) + EX_ST(STORE_INIT(%g2, %o0), NG4_retl_o2_plus_o4_plus_24) add %o0, 0x08, %o0 - EX_ST(STORE_INIT(%g3, %o0)) + EX_ST(STORE_INIT(%g3, %o0), NG4_retl_o2_plus_o4_plus_16) add %o0, 0x08, %o0 - EX_ST(STORE_INIT(GLOBAL_SPARE, %o0)) + EX_ST(STORE_INIT(GLOBAL_SPARE, %o0), NG4_retl_o2_plus_o4_plus_8) add %o0, 0x08, %o0 bne,pt %icc, 1b LOAD(prefetch, %o1 + 0x200, #n_reads_strong) @@ -216,17 +367,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ sub %o2, %o4, %o2 alignaddr %o1, %g0, %g1 add %o1, %o4, %o1 - EX_LD_FP(LOAD(ldd, %g1 + 0x00, %f0)) -1: EX_LD_FP(LOAD(ldd, %g1 + 0x08, %f2)) + EX_LD_FP(LOAD(ldd, %g1 + 0x00, %f0), NG4_retl_o2_plus_o4) +1: EX_LD_FP(LOAD(ldd, %g1 + 0x08, %f2), NG4_retl_o2_plus_o4) subcc %o4, 0x40, %o4 - EX_LD_FP(LOAD(ldd, %g1 + 0x10, %f4)) - EX_LD_FP(LOAD(ldd, %g1 + 0x18, %f6)) - EX_LD_FP(LOAD(ldd, %g1 + 0x20, %f8)) - EX_LD_FP(LOAD(ldd, %g1 + 0x28, %f10)) - EX_LD_FP(LOAD(ldd, %g1 + 0x30, %f12)) - EX_LD_FP(LOAD(ldd, %g1 + 0x38, %f14)) + EX_LD_FP(LOAD(ldd, %g1 + 0x10, %f4), NG4_retl_o2_plus_o4_plus_64) + EX_LD_FP(LOAD(ldd, %g1 + 0x18, %f6), NG4_retl_o2_plus_o4_plus_64) + EX_LD_FP(LOAD(ldd, %g1 + 0x20, %f8), NG4_retl_o2_plus_o4_plus_64) + EX_LD_FP(LOAD(ldd, %g1 + 0x28, %f10), NG4_retl_o2_plus_o4_plus_64) + EX_LD_FP(LOAD(ldd, %g1 + 0x30, %f12), NG4_retl_o2_plus_o4_plus_64) + EX_LD_FP(LOAD(ldd, %g1 + 0x38, %f14), NG4_retl_o2_plus_o4_plus_64) faligndata %f0, %f2, %f16 - EX_LD_FP(LOAD(ldd, %g1 + 0x40, %f0)) + EX_LD_FP(LOAD(ldd, %g1 + 0x40, %f0), NG4_retl_o2_plus_o4_plus_64) faligndata %f2, %f4, %f18 add %g1, 0x40, %g1 faligndata %f4, %f6, %f20 @@ -235,14 +386,14 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ faligndata %f10, %f12, %f26 faligndata %f12, %f14, %f28 faligndata %f14, %f0, %f30 - EX_ST_FP(STORE(std, %f16, %o0 + 0x00)) - EX_ST_FP(STORE(std, %f18, %o0 + 0x08)) - EX_ST_FP(STORE(std, %f20, %o0 + 0x10)) - EX_ST_FP(STORE(std, %f22, %o0 + 0x18)) - EX_ST_FP(STORE(std, %f24, %o0 + 0x20)) - EX_ST_FP(STORE(std, %f26, %o0 + 0x28)) - EX_ST_FP(STORE(std, %f28, %o0 + 0x30)) - EX_ST_FP(STORE(std, %f30, %o0 + 0x38)) + EX_ST_FP(STORE(std, %f16, %o0 + 0x00), NG4_retl_o2_plus_o4_plus_64) + EX_ST_FP(STORE(std, %f18, %o0 + 0x08), NG4_retl_o2_plus_o4_plus_56) + EX_ST_FP(STORE(std, %f20, %o0 + 0x10), NG4_retl_o2_plus_o4_plus_48) + EX_ST_FP(STORE(std, %f22, %o0 + 0x18), NG4_retl_o2_plus_o4_plus_40) + EX_ST_FP(STORE(std, %f24, %o0 + 0x20), NG4_retl_o2_plus_o4_plus_32) + EX_ST_FP(STORE(std, %f26, %o0 + 0x28), NG4_retl_o2_plus_o4_plus_24) + EX_ST_FP(STORE(std, %f28, %o0 + 0x30), NG4_retl_o2_plus_o4_plus_16) + EX_ST_FP(STORE(std, %f30, %o0 + 0x38), NG4_retl_o2_plus_o4_plus_8) add %o0, 0x40, %o0 bne,pt %icc, 1b LOAD(prefetch, %g1 + 0x200, #n_reads_strong) @@ -270,37 +421,38 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ andncc %o2, 0x20 - 1, %o5 be,pn %icc, 2f sub %o2, %o5, %o2 -1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1)) - EX_LD(LOAD(ldx, %o1 + 0x08, %g2)) - EX_LD(LOAD(ldx, %o1 + 0x10, GLOBAL_SPARE)) - EX_LD(LOAD(ldx, %o1 + 0x18, %o4)) +1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1), NG4_retl_o2_plus_o5) + EX_LD(LOAD(ldx, %o1 + 0x08, %g2), NG4_retl_o2_plus_o5) + EX_LD(LOAD(ldx, %o1 + 0x10, GLOBAL_SPARE), NG4_retl_o2_plus_o5) + EX_LD(LOAD(ldx, %o1 + 0x18, %o4), NG4_retl_o2_plus_o5) add %o1, 0x20, %o1 subcc %o5, 0x20, %o5 - EX_ST(STORE(stx, %g1, %o0 + 0x00)) - EX_ST(STORE(stx, %g2, %o0 + 0x08)) - EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x10)) - EX_ST(STORE(stx, %o4, %o0 + 0x18)) + EX_ST(STORE(stx, %g1, %o0 + 0x00), NG4_retl_o2_plus_o5_plus_32) + EX_ST(STORE(stx, %g2, %o0 + 0x08), NG4_retl_o2_plus_o5_plus_24) + EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x10), NG4_retl_o2_plus_o5_plus_24) + EX_ST(STORE(stx, %o4, %o0 + 0x18), NG4_retl_o2_plus_o5_plus_8) bne,pt %icc, 1b add %o0, 0x20, %o0 2: andcc %o2, 0x18, %o5 be,pt %icc, 3f sub %o2, %o5, %o2 -1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1)) + +1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1), NG4_retl_o2_plus_o5) add %o1, 0x08, %o1 add %o0, 0x08, %o0 subcc %o5, 0x08, %o5 bne,pt %icc, 1b - EX_ST(STORE(stx, %g1, %o0 - 0x08)) + EX_ST(STORE(stx, %g1, %o0 - 0x08), NG4_retl_o2_plus_o5_plus_8) 3: brz,pt %o2, .Lexit cmp %o2, 0x04 bl,pn %icc, .Ltiny nop - EX_LD(LOAD(lduw, %o1 + 0x00, %g1)) + EX_LD(LOAD(lduw, %o1 + 0x00, %g1), NG4_retl_o2) add %o1, 0x04, %o1 add %o0, 0x04, %o0 subcc %o2, 0x04, %o2 bne,pn %icc, .Ltiny - EX_ST(STORE(stw, %g1, %o0 - 0x04)) + EX_ST(STORE(stw, %g1, %o0 - 0x04), NG4_retl_o2_plus_4) ba,a,pt %icc, .Lexit .Lmedium_unaligned: /* First get dest 8 byte aligned. */ @@ -309,12 +461,12 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ brz,pt %g1, 2f sub %o2, %g1, %o2 -1: EX_LD(LOAD(ldub, %o1 + 0x00, %g2)) +1: EX_LD(LOAD(ldub, %o1 + 0x00, %g2), NG4_retl_o2_plus_g1) add %o1, 1, %o1 subcc %g1, 1, %g1 add %o0, 1, %o0 bne,pt %icc, 1b - EX_ST(STORE(stb, %g2, %o0 - 0x01)) + EX_ST(STORE(stb, %g2, %o0 - 0x01), NG4_retl_o2_plus_g1_plus_1) 2: and %o1, 0x7, %g1 brz,pn %g1, .Lmedium_noprefetch @@ -322,16 +474,16 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ mov 64, %g2 sub %g2, %g1, %g2 andn %o1, 0x7, %o1 - EX_LD(LOAD(ldx, %o1 + 0x00, %o4)) + EX_LD(LOAD(ldx, %o1 + 0x00, %o4), NG4_retl_o2) sllx %o4, %g1, %o4 andn %o2, 0x08 - 1, %o5 sub %o2, %o5, %o2 -1: EX_LD(LOAD(ldx, %o1 + 0x08, %g3)) +1: EX_LD(LOAD(ldx, %o1 + 0x08, %g3), NG4_retl_o2_plus_o5) add %o1, 0x08, %o1 subcc %o5, 0x08, %o5 srlx %g3, %g2, GLOBAL_SPARE or GLOBAL_SPARE, %o4, GLOBAL_SPARE - EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x00)) + EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x00), NG4_retl_o2_plus_o5_plus_8) add %o0, 0x08, %o0 bne,pt %icc, 1b sllx %g3, %g1, %o4 @@ -342,17 +494,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ ba,pt %icc, .Lsmall_unaligned .Ltiny: - EX_LD(LOAD(ldub, %o1 + 0x00, %g1)) + EX_LD(LOAD(ldub, %o1 + 0x00, %g1), NG4_retl_o2) subcc %o2, 1, %o2 be,pn %icc, .Lexit - EX_ST(STORE(stb, %g1, %o0 + 0x00)) - EX_LD(LOAD(ldub, %o1 + 0x01, %g1)) + EX_ST(STORE(stb, %g1, %o0 + 0x00), NG4_retl_o2_plus_1) + EX_LD(LOAD(ldub, %o1 + 0x01, %g1), NG4_retl_o2) subcc %o2, 1, %o2 be,pn %icc, .Lexit - EX_ST(STORE(stb, %g1, %o0 + 0x01)) - EX_LD(LOAD(ldub, %o1 + 0x02, %g1)) + EX_ST(STORE(stb, %g1, %o0 + 0x01), NG4_retl_o2_plus_1) + EX_LD(LOAD(ldub, %o1 + 0x02, %g1), NG4_retl_o2) ba,pt %icc, .Lexit - EX_ST(STORE(stb, %g1, %o0 + 0x02)) + EX_ST(STORE(stb, %g1, %o0 + 0x02), NG4_retl_o2) .Lsmall: andcc %g2, 0x3, %g0 @@ -360,22 +512,22 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ andn %o2, 0x4 - 1, %o5 sub %o2, %o5, %o2 1: - EX_LD(LOAD(lduw, %o1 + 0x00, %g1)) + EX_LD(LOAD(lduw, %o1 + 0x00, %g1), NG4_retl_o2_plus_o5) add %o1, 0x04, %o1 subcc %o5, 0x04, %o5 add %o0, 0x04, %o0 bne,pt %icc, 1b - EX_ST(STORE(stw, %g1, %o0 - 0x04)) + EX_ST(STORE(stw, %g1, %o0 - 0x04), NG4_retl_o2_plus_o5_plus_4) brz,pt %o2, .Lexit nop ba,a,pt %icc, .Ltiny .Lsmall_unaligned: -1: EX_LD(LOAD(ldub, %o1 + 0x00, %g1)) +1: EX_LD(LOAD(ldub, %o1 + 0x00, %g1), NG4_retl_o2) add %o1, 1, %o1 add %o0, 1, %o0 subcc %o2, 1, %o2 bne,pt %icc, 1b - EX_ST(STORE(stb, %g1, %o0 - 0x01)) + EX_ST(STORE(stb, %g1, %o0 - 0x01), NG4_retl_o2_plus_1) ba,a,pt %icc, .Lexit .size FUNC_NAME, .-FUNC_NAME diff --git a/kernel/arch/sparc/lib/NGcopy_from_user.S b/kernel/arch/sparc/lib/NGcopy_from_user.S index 5d1e4d1ac..9cd42fcbc 100644 --- a/kernel/arch/sparc/lib/NGcopy_from_user.S +++ b/kernel/arch/sparc/lib/NGcopy_from_user.S @@ -3,11 +3,11 @@ * Copyright (C) 2006, 2007 David S. Miller (davem@davemloft.net) */ -#define EX_LD(x) \ +#define EX_LD(x,y) \ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __ret_one_asi;\ + .word 98b, y; \ .text; \ .align 4; diff --git a/kernel/arch/sparc/lib/NGcopy_to_user.S b/kernel/arch/sparc/lib/NGcopy_to_user.S index ff630dcb2..5c358afd4 100644 --- a/kernel/arch/sparc/lib/NGcopy_to_user.S +++ b/kernel/arch/sparc/lib/NGcopy_to_user.S @@ -3,11 +3,11 @@ * Copyright (C) 2006, 2007 David S. Miller (davem@davemloft.net) */ -#define EX_ST(x) \ +#define EX_ST(x,y) \ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __ret_one_asi;\ + .word 98b, y; \ .text; \ .align 4; diff --git a/kernel/arch/sparc/lib/NGmemcpy.S b/kernel/arch/sparc/lib/NGmemcpy.S index 96a14caf6..d88c4ed50 100644 --- a/kernel/arch/sparc/lib/NGmemcpy.S +++ b/kernel/arch/sparc/lib/NGmemcpy.S @@ -4,6 +4,7 @@ */ #ifdef __KERNEL__ +#include #include #include #define GLOBAL_SPARE %g7 @@ -27,15 +28,11 @@ #endif #ifndef EX_LD -#define EX_LD(x) x +#define EX_LD(x,y) x #endif #ifndef EX_ST -#define EX_ST(x) x -#endif - -#ifndef EX_RETVAL -#define EX_RETVAL(x) x +#define EX_ST(x,y) x #endif #ifndef LOAD @@ -79,6 +76,92 @@ .register %g3,#scratch .text +#ifndef EX_RETVAL +#define EX_RETVAL(x) x +__restore_asi: + ret + wr %g0, ASI_AIUS, %asi + restore +ENTRY(NG_ret_i2_plus_i4_plus_1) + ba,pt %xcc, __restore_asi + add %i2, %i5, %i0 +ENDPROC(NG_ret_i2_plus_i4_plus_1) +ENTRY(NG_ret_i2_plus_g1) + ba,pt %xcc, __restore_asi + add %i2, %g1, %i0 +ENDPROC(NG_ret_i2_plus_g1) +ENTRY(NG_ret_i2_plus_g1_minus_8) + sub %g1, 8, %g1 + ba,pt %xcc, __restore_asi + add %i2, %g1, %i0 +ENDPROC(NG_ret_i2_plus_g1_minus_8) +ENTRY(NG_ret_i2_plus_g1_minus_16) + sub %g1, 16, %g1 + ba,pt %xcc, __restore_asi + add %i2, %g1, %i0 +ENDPROC(NG_ret_i2_plus_g1_minus_16) +ENTRY(NG_ret_i2_plus_g1_minus_24) + sub %g1, 24, %g1 + ba,pt %xcc, __restore_asi + add %i2, %g1, %i0 +ENDPROC(NG_ret_i2_plus_g1_minus_24) +ENTRY(NG_ret_i2_plus_g1_minus_32) + sub %g1, 32, %g1 + ba,pt %xcc, __restore_asi + add %i2, %g1, %i0 +ENDPROC(NG_ret_i2_plus_g1_minus_32) +ENTRY(NG_ret_i2_plus_g1_minus_40) + sub %g1, 40, %g1 + ba,pt %xcc, __restore_asi + add %i2, %g1, %i0 +ENDPROC(NG_ret_i2_plus_g1_minus_40) +ENTRY(NG_ret_i2_plus_g1_minus_48) + sub %g1, 48, %g1 + ba,pt %xcc, __restore_asi + add %i2, %g1, %i0 +ENDPROC(NG_ret_i2_plus_g1_minus_48) +ENTRY(NG_ret_i2_plus_g1_minus_56) + sub %g1, 56, %g1 + ba,pt %xcc, __restore_asi + add %i2, %g1, %i0 +ENDPROC(NG_ret_i2_plus_g1_minus_56) +ENTRY(NG_ret_i2_plus_i4) + ba,pt %xcc, __restore_asi + add %i2, %i4, %i0 +ENDPROC(NG_ret_i2_plus_i4) +ENTRY(NG_ret_i2_plus_i4_minus_8) + sub %i4, 8, %i4 + ba,pt %xcc, __restore_asi + add %i2, %i4, %i0 +ENDPROC(NG_ret_i2_plus_i4_minus_8) +ENTRY(NG_ret_i2_plus_8) + ba,pt %xcc, __restore_asi + add %i2, 8, %i0 +ENDPROC(NG_ret_i2_plus_8) +ENTRY(NG_ret_i2_plus_4) + ba,pt %xcc, __restore_asi + add %i2, 4, %i0 +ENDPROC(NG_ret_i2_plus_4) +ENTRY(NG_ret_i2_plus_1) + ba,pt %xcc, __restore_asi + add %i2, 1, %i0 +ENDPROC(NG_ret_i2_plus_1) +ENTRY(NG_ret_i2_plus_g1_plus_1) + add %g1, 1, %g1 + ba,pt %xcc, __restore_asi + add %i2, %g1, %i0 +ENDPROC(NG_ret_i2_plus_g1_plus_1) +ENTRY(NG_ret_i2) + ba,pt %xcc, __restore_asi + mov %i2, %i0 +ENDPROC(NG_ret_i2) +ENTRY(NG_ret_i2_and_7_plus_i4) + and %i2, 7, %i2 + ba,pt %xcc, __restore_asi + add %i2, %i4, %i0 +ENDPROC(NG_ret_i2_and_7_plus_i4) +#endif + .align 64 .globl FUNC_NAME @@ -126,8 +209,8 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */ sub %g0, %i4, %i4 ! bytes to align dst sub %i2, %i4, %i2 1: subcc %i4, 1, %i4 - EX_LD(LOAD(ldub, %i1, %g1)) - EX_ST(STORE(stb, %g1, %o0)) + EX_LD(LOAD(ldub, %i1, %g1), NG_ret_i2_plus_i4_plus_1) + EX_ST(STORE(stb, %g1, %o0), NG_ret_i2_plus_i4_plus_1) add %i1, 1, %i1 bne,pt %XCC, 1b add %o0, 1, %o0 @@ -160,7 +243,7 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */ and %i4, 0x7, GLOBAL_SPARE sll GLOBAL_SPARE, 3, GLOBAL_SPARE mov 64, %i5 - EX_LD(LOAD_TWIN(%i1, %g2, %g3)) + EX_LD(LOAD_TWIN(%i1, %g2, %g3), NG_ret_i2_plus_g1) sub %i5, GLOBAL_SPARE, %i5 mov 16, %o4 mov 32, %o5 @@ -178,31 +261,31 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */ srlx WORD3, PRE_SHIFT, TMP; \ or WORD2, TMP, WORD2; -8: EX_LD(LOAD_TWIN(%i1 + %o4, %o2, %o3)) +8: EX_LD(LOAD_TWIN(%i1 + %o4, %o2, %o3), NG_ret_i2_plus_g1) MIX_THREE_WORDS(%g2, %g3, %o2, %i5, GLOBAL_SPARE, %o1) LOAD(prefetch, %i1 + %i3, #one_read) - EX_ST(STORE_INIT(%g2, %o0 + 0x00)) - EX_ST(STORE_INIT(%g3, %o0 + 0x08)) + EX_ST(STORE_INIT(%g2, %o0 + 0x00), NG_ret_i2_plus_g1) + EX_ST(STORE_INIT(%g3, %o0 + 0x08), NG_ret_i2_plus_g1_minus_8) - EX_LD(LOAD_TWIN(%i1 + %o5, %g2, %g3)) + EX_LD(LOAD_TWIN(%i1 + %o5, %g2, %g3), NG_ret_i2_plus_g1_minus_16) MIX_THREE_WORDS(%o2, %o3, %g2, %i5, GLOBAL_SPARE, %o1) - EX_ST(STORE_INIT(%o2, %o0 + 0x10)) - EX_ST(STORE_INIT(%o3, %o0 + 0x18)) + EX_ST(STORE_INIT(%o2, %o0 + 0x10), NG_ret_i2_plus_g1_minus_16) + EX_ST(STORE_INIT(%o3, %o0 + 0x18), NG_ret_i2_plus_g1_minus_24) - EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3)) + EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3), NG_ret_i2_plus_g1_minus_32) MIX_THREE_WORDS(%g2, %g3, %o2, %i5, GLOBAL_SPARE, %o1) - EX_ST(STORE_INIT(%g2, %o0 + 0x20)) - EX_ST(STORE_INIT(%g3, %o0 + 0x28)) + EX_ST(STORE_INIT(%g2, %o0 + 0x20), NG_ret_i2_plus_g1_minus_32) + EX_ST(STORE_INIT(%g3, %o0 + 0x28), NG_ret_i2_plus_g1_minus_40) - EX_LD(LOAD_TWIN(%i1 + %i3, %g2, %g3)) + EX_LD(LOAD_TWIN(%i1 + %i3, %g2, %g3), NG_ret_i2_plus_g1_minus_48) add %i1, 64, %i1 MIX_THREE_WORDS(%o2, %o3, %g2, %i5, GLOBAL_SPARE, %o1) - EX_ST(STORE_INIT(%o2, %o0 + 0x30)) - EX_ST(STORE_INIT(%o3, %o0 + 0x38)) + EX_ST(STORE_INIT(%o2, %o0 + 0x30), NG_ret_i2_plus_g1_minus_48) + EX_ST(STORE_INIT(%o3, %o0 + 0x38), NG_ret_i2_plus_g1_minus_56) subcc %g1, 64, %g1 bne,pt %XCC, 8b @@ -211,31 +294,31 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */ ba,pt %XCC, 60f add %i1, %i4, %i1 -9: EX_LD(LOAD_TWIN(%i1 + %o4, %o2, %o3)) +9: EX_LD(LOAD_TWIN(%i1 + %o4, %o2, %o3), NG_ret_i2_plus_g1) MIX_THREE_WORDS(%g3, %o2, %o3, %i5, GLOBAL_SPARE, %o1) LOAD(prefetch, %i1 + %i3, #one_read) - EX_ST(STORE_INIT(%g3, %o0 + 0x00)) - EX_ST(STORE_INIT(%o2, %o0 + 0x08)) + EX_ST(STORE_INIT(%g3, %o0 + 0x00), NG_ret_i2_plus_g1) + EX_ST(STORE_INIT(%o2, %o0 + 0x08), NG_ret_i2_plus_g1_minus_8) - EX_LD(LOAD_TWIN(%i1 + %o5, %g2, %g3)) + EX_LD(LOAD_TWIN(%i1 + %o5, %g2, %g3), NG_ret_i2_plus_g1_minus_16) MIX_THREE_WORDS(%o3, %g2, %g3, %i5, GLOBAL_SPARE, %o1) - EX_ST(STORE_INIT(%o3, %o0 + 0x10)) - EX_ST(STORE_INIT(%g2, %o0 + 0x18)) + EX_ST(STORE_INIT(%o3, %o0 + 0x10), NG_ret_i2_plus_g1_minus_16) + EX_ST(STORE_INIT(%g2, %o0 + 0x18), NG_ret_i2_plus_g1_minus_24) - EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3)) + EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3), NG_ret_i2_plus_g1_minus_32) MIX_THREE_WORDS(%g3, %o2, %o3, %i5, GLOBAL_SPARE, %o1) - EX_ST(STORE_INIT(%g3, %o0 + 0x20)) - EX_ST(STORE_INIT(%o2, %o0 + 0x28)) + EX_ST(STORE_INIT(%g3, %o0 + 0x20), NG_ret_i2_plus_g1_minus_32) + EX_ST(STORE_INIT(%o2, %o0 + 0x28), NG_ret_i2_plus_g1_minus_40) - EX_LD(LOAD_TWIN(%i1 + %i3, %g2, %g3)) + EX_LD(LOAD_TWIN(%i1 + %i3, %g2, %g3), NG_ret_i2_plus_g1_minus_48) add %i1, 64, %i1 MIX_THREE_WORDS(%o3, %g2, %g3, %i5, GLOBAL_SPARE, %o1) - EX_ST(STORE_INIT(%o3, %o0 + 0x30)) - EX_ST(STORE_INIT(%g2, %o0 + 0x38)) + EX_ST(STORE_INIT(%o3, %o0 + 0x30), NG_ret_i2_plus_g1_minus_48) + EX_ST(STORE_INIT(%g2, %o0 + 0x38), NG_ret_i2_plus_g1_minus_56) subcc %g1, 64, %g1 bne,pt %XCC, 9b @@ -249,25 +332,25 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */ * one twin load ahead, then add 8 back into source when * we finish the loop. */ - EX_LD(LOAD_TWIN(%i1, %o4, %o5)) + EX_LD(LOAD_TWIN(%i1, %o4, %o5), NG_ret_i2_plus_g1) mov 16, %o7 mov 32, %g2 mov 48, %g3 mov 64, %o1 -1: EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3)) +1: EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3), NG_ret_i2_plus_g1) LOAD(prefetch, %i1 + %o1, #one_read) - EX_ST(STORE_INIT(%o5, %o0 + 0x00)) ! initializes cache line - EX_ST(STORE_INIT(%o2, %o0 + 0x08)) - EX_LD(LOAD_TWIN(%i1 + %g2, %o4, %o5)) - EX_ST(STORE_INIT(%o3, %o0 + 0x10)) - EX_ST(STORE_INIT(%o4, %o0 + 0x18)) - EX_LD(LOAD_TWIN(%i1 + %g3, %o2, %o3)) - EX_ST(STORE_INIT(%o5, %o0 + 0x20)) - EX_ST(STORE_INIT(%o2, %o0 + 0x28)) - EX_LD(LOAD_TWIN(%i1 + %o1, %o4, %o5)) + EX_ST(STORE_INIT(%o5, %o0 + 0x00), NG_ret_i2_plus_g1) ! initializes cache line + EX_ST(STORE_INIT(%o2, %o0 + 0x08), NG_ret_i2_plus_g1_minus_8) + EX_LD(LOAD_TWIN(%i1 + %g2, %o4, %o5), NG_ret_i2_plus_g1_minus_16) + EX_ST(STORE_INIT(%o3, %o0 + 0x10), NG_ret_i2_plus_g1_minus_16) + EX_ST(STORE_INIT(%o4, %o0 + 0x18), NG_ret_i2_plus_g1_minus_24) + EX_LD(LOAD_TWIN(%i1 + %g3, %o2, %o3), NG_ret_i2_plus_g1_minus_32) + EX_ST(STORE_INIT(%o5, %o0 + 0x20), NG_ret_i2_plus_g1_minus_32) + EX_ST(STORE_INIT(%o2, %o0 + 0x28), NG_ret_i2_plus_g1_minus_40) + EX_LD(LOAD_TWIN(%i1 + %o1, %o4, %o5), NG_ret_i2_plus_g1_minus_48) add %i1, 64, %i1 - EX_ST(STORE_INIT(%o3, %o0 + 0x30)) - EX_ST(STORE_INIT(%o4, %o0 + 0x38)) + EX_ST(STORE_INIT(%o3, %o0 + 0x30), NG_ret_i2_plus_g1_minus_48) + EX_ST(STORE_INIT(%o4, %o0 + 0x38), NG_ret_i2_plus_g1_minus_56) subcc %g1, 64, %g1 bne,pt %XCC, 1b add %o0, 64, %o0 @@ -282,20 +365,20 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */ mov 32, %g2 mov 48, %g3 mov 64, %o1 -1: EX_LD(LOAD_TWIN(%i1 + %g0, %o4, %o5)) - EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3)) +1: EX_LD(LOAD_TWIN(%i1 + %g0, %o4, %o5), NG_ret_i2_plus_g1) + EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3), NG_ret_i2_plus_g1) LOAD(prefetch, %i1 + %o1, #one_read) - EX_ST(STORE_INIT(%o4, %o0 + 0x00)) ! initializes cache line - EX_ST(STORE_INIT(%o5, %o0 + 0x08)) - EX_LD(LOAD_TWIN(%i1 + %g2, %o4, %o5)) - EX_ST(STORE_INIT(%o2, %o0 + 0x10)) - EX_ST(STORE_INIT(%o3, %o0 + 0x18)) - EX_LD(LOAD_TWIN(%i1 + %g3, %o2, %o3)) + EX_ST(STORE_INIT(%o4, %o0 + 0x00), NG_ret_i2_plus_g1) ! initializes cache line + EX_ST(STORE_INIT(%o5, %o0 + 0x08), NG_ret_i2_plus_g1_minus_8) + EX_LD(LOAD_TWIN(%i1 + %g2, %o4, %o5), NG_ret_i2_plus_g1_minus_16) + EX_ST(STORE_INIT(%o2, %o0 + 0x10), NG_ret_i2_plus_g1_minus_16) + EX_ST(STORE_INIT(%o3, %o0 + 0x18), NG_ret_i2_plus_g1_minus_24) + EX_LD(LOAD_TWIN(%i1 + %g3, %o2, %o3), NG_ret_i2_plus_g1_minus_32) add %i1, 64, %i1 - EX_ST(STORE_INIT(%o4, %o0 + 0x20)) - EX_ST(STORE_INIT(%o5, %o0 + 0x28)) - EX_ST(STORE_INIT(%o2, %o0 + 0x30)) - EX_ST(STORE_INIT(%o3, %o0 + 0x38)) + EX_ST(STORE_INIT(%o4, %o0 + 0x20), NG_ret_i2_plus_g1_minus_32) + EX_ST(STORE_INIT(%o5, %o0 + 0x28), NG_ret_i2_plus_g1_minus_40) + EX_ST(STORE_INIT(%o2, %o0 + 0x30), NG_ret_i2_plus_g1_minus_48) + EX_ST(STORE_INIT(%o3, %o0 + 0x38), NG_ret_i2_plus_g1_minus_56) subcc %g1, 64, %g1 bne,pt %XCC, 1b add %o0, 64, %o0 @@ -321,28 +404,28 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */ andn %i2, 0xf, %i4 and %i2, 0xf, %i2 1: subcc %i4, 0x10, %i4 - EX_LD(LOAD(ldx, %i1, %o4)) + EX_LD(LOAD(ldx, %i1, %o4), NG_ret_i2_plus_i4) add %i1, 0x08, %i1 - EX_LD(LOAD(ldx, %i1, %g1)) + EX_LD(LOAD(ldx, %i1, %g1), NG_ret_i2_plus_i4) sub %i1, 0x08, %i1 - EX_ST(STORE(stx, %o4, %i1 + %i3)) + EX_ST(STORE(stx, %o4, %i1 + %i3), NG_ret_i2_plus_i4) add %i1, 0x8, %i1 - EX_ST(STORE(stx, %g1, %i1 + %i3)) + EX_ST(STORE(stx, %g1, %i1 + %i3), NG_ret_i2_plus_i4_minus_8) bgu,pt %XCC, 1b add %i1, 0x8, %i1 73: andcc %i2, 0x8, %g0 be,pt %XCC, 1f nop sub %i2, 0x8, %i2 - EX_LD(LOAD(ldx, %i1, %o4)) - EX_ST(STORE(stx, %o4, %i1 + %i3)) + EX_LD(LOAD(ldx, %i1, %o4), NG_ret_i2_plus_8) + EX_ST(STORE(stx, %o4, %i1 + %i3), NG_ret_i2_plus_8) add %i1, 0x8, %i1 1: andcc %i2, 0x4, %g0 be,pt %XCC, 1f nop sub %i2, 0x4, %i2 - EX_LD(LOAD(lduw, %i1, %i5)) - EX_ST(STORE(stw, %i5, %i1 + %i3)) + EX_LD(LOAD(lduw, %i1, %i5), NG_ret_i2_plus_4) + EX_ST(STORE(stw, %i5, %i1 + %i3), NG_ret_i2_plus_4) add %i1, 0x4, %i1 1: cmp %i2, 0 be,pt %XCC, 85f @@ -358,8 +441,8 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */ sub %i2, %g1, %i2 1: subcc %g1, 1, %g1 - EX_LD(LOAD(ldub, %i1, %i5)) - EX_ST(STORE(stb, %i5, %i1 + %i3)) + EX_LD(LOAD(ldub, %i1, %i5), NG_ret_i2_plus_g1_plus_1) + EX_ST(STORE(stb, %i5, %i1 + %i3), NG_ret_i2_plus_g1_plus_1) bgu,pt %icc, 1b add %i1, 1, %i1 @@ -375,16 +458,16 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */ 8: mov 64, %i3 andn %i1, 0x7, %i1 - EX_LD(LOAD(ldx, %i1, %g2)) + EX_LD(LOAD(ldx, %i1, %g2), NG_ret_i2) sub %i3, %g1, %i3 andn %i2, 0x7, %i4 sllx %g2, %g1, %g2 1: add %i1, 0x8, %i1 - EX_LD(LOAD(ldx, %i1, %g3)) + EX_LD(LOAD(ldx, %i1, %g3), NG_ret_i2_and_7_plus_i4) subcc %i4, 0x8, %i4 srlx %g3, %i3, %i5 or %i5, %g2, %i5 - EX_ST(STORE(stx, %i5, %o0)) + EX_ST(STORE(stx, %i5, %o0), NG_ret_i2_and_7_plus_i4) add %o0, 0x8, %o0 bgu,pt %icc, 1b sllx %g3, %g1, %g2 @@ -404,8 +487,8 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */ 1: subcc %i2, 4, %i2 - EX_LD(LOAD(lduw, %i1, %g1)) - EX_ST(STORE(stw, %g1, %i1 + %i3)) + EX_LD(LOAD(lduw, %i1, %g1), NG_ret_i2_plus_4) + EX_ST(STORE(stw, %g1, %i1 + %i3), NG_ret_i2_plus_4) bgu,pt %XCC, 1b add %i1, 4, %i1 @@ -415,8 +498,8 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */ .align 32 90: subcc %i2, 1, %i2 - EX_LD(LOAD(ldub, %i1, %g1)) - EX_ST(STORE(stb, %g1, %i1 + %i3)) + EX_LD(LOAD(ldub, %i1, %g1), NG_ret_i2_plus_1) + EX_ST(STORE(stb, %g1, %i1 + %i3), NG_ret_i2_plus_1) bgu,pt %XCC, 90b add %i1, 1, %i1 ret diff --git a/kernel/arch/sparc/lib/U1copy_from_user.S b/kernel/arch/sparc/lib/U1copy_from_user.S index ecc5692fa..bb6ff7322 100644 --- a/kernel/arch/sparc/lib/U1copy_from_user.S +++ b/kernel/arch/sparc/lib/U1copy_from_user.S @@ -3,19 +3,19 @@ * Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com) */ -#define EX_LD(x) \ +#define EX_LD(x,y) \ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_one; \ + .word 98b, y; \ .text; \ .align 4; -#define EX_LD_FP(x) \ +#define EX_LD_FP(x,y) \ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_one_fp;\ + .word 98b, y; \ .text; \ .align 4; diff --git a/kernel/arch/sparc/lib/U1copy_to_user.S b/kernel/arch/sparc/lib/U1copy_to_user.S index 9eea392e4..ed92ce739 100644 --- a/kernel/arch/sparc/lib/U1copy_to_user.S +++ b/kernel/arch/sparc/lib/U1copy_to_user.S @@ -3,19 +3,19 @@ * Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com) */ -#define EX_ST(x) \ +#define EX_ST(x,y) \ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_one; \ + .word 98b, y; \ .text; \ .align 4; -#define EX_ST_FP(x) \ +#define EX_ST_FP(x,y) \ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_one_fp;\ + .word 98b, y; \ .text; \ .align 4; diff --git a/kernel/arch/sparc/lib/U1memcpy.S b/kernel/arch/sparc/lib/U1memcpy.S index 3e6209ebb..f30d2ab2c 100644 --- a/kernel/arch/sparc/lib/U1memcpy.S +++ b/kernel/arch/sparc/lib/U1memcpy.S @@ -5,6 +5,7 @@ */ #ifdef __KERNEL__ +#include #include #include #define GLOBAL_SPARE g7 @@ -23,21 +24,17 @@ #endif #ifndef EX_LD -#define EX_LD(x) x +#define EX_LD(x,y) x #endif #ifndef EX_LD_FP -#define EX_LD_FP(x) x +#define EX_LD_FP(x,y) x #endif #ifndef EX_ST -#define EX_ST(x) x +#define EX_ST(x,y) x #endif #ifndef EX_ST_FP -#define EX_ST_FP(x) x -#endif - -#ifndef EX_RETVAL -#define EX_RETVAL(x) x +#define EX_ST_FP(x,y) x #endif #ifndef LOAD @@ -78,53 +75,169 @@ faligndata %f7, %f8, %f60; \ faligndata %f8, %f9, %f62; -#define MAIN_LOOP_CHUNK(src, dest, fdest, fsrc, len, jmptgt) \ - EX_LD_FP(LOAD_BLK(%src, %fdest)); \ - EX_ST_FP(STORE_BLK(%fsrc, %dest)); \ - add %src, 0x40, %src; \ - subcc %len, 0x40, %len; \ - be,pn %xcc, jmptgt; \ - add %dest, 0x40, %dest; \ - -#define LOOP_CHUNK1(src, dest, len, branch_dest) \ - MAIN_LOOP_CHUNK(src, dest, f0, f48, len, branch_dest) -#define LOOP_CHUNK2(src, dest, len, branch_dest) \ - MAIN_LOOP_CHUNK(src, dest, f16, f48, len, branch_dest) -#define LOOP_CHUNK3(src, dest, len, branch_dest) \ - MAIN_LOOP_CHUNK(src, dest, f32, f48, len, branch_dest) +#define MAIN_LOOP_CHUNK(src, dest, fdest, fsrc, jmptgt) \ + EX_LD_FP(LOAD_BLK(%src, %fdest), U1_gs_80_fp); \ + EX_ST_FP(STORE_BLK(%fsrc, %dest), U1_gs_80_fp); \ + add %src, 0x40, %src; \ + subcc %GLOBAL_SPARE, 0x40, %GLOBAL_SPARE; \ + be,pn %xcc, jmptgt; \ + add %dest, 0x40, %dest; \ + +#define LOOP_CHUNK1(src, dest, branch_dest) \ + MAIN_LOOP_CHUNK(src, dest, f0, f48, branch_dest) +#define LOOP_CHUNK2(src, dest, branch_dest) \ + MAIN_LOOP_CHUNK(src, dest, f16, f48, branch_dest) +#define LOOP_CHUNK3(src, dest, branch_dest) \ + MAIN_LOOP_CHUNK(src, dest, f32, f48, branch_dest) #define DO_SYNC membar #Sync; #define STORE_SYNC(dest, fsrc) \ - EX_ST_FP(STORE_BLK(%fsrc, %dest)); \ + EX_ST_FP(STORE_BLK(%fsrc, %dest), U1_gs_80_fp); \ add %dest, 0x40, %dest; \ DO_SYNC #define STORE_JUMP(dest, fsrc, target) \ - EX_ST_FP(STORE_BLK(%fsrc, %dest)); \ + EX_ST_FP(STORE_BLK(%fsrc, %dest), U1_gs_40_fp); \ add %dest, 0x40, %dest; \ ba,pt %xcc, target; \ nop; -#define FINISH_VISCHUNK(dest, f0, f1, left) \ - subcc %left, 8, %left;\ - bl,pn %xcc, 95f; \ - faligndata %f0, %f1, %f48; \ - EX_ST_FP(STORE(std, %f48, %dest)); \ +#define FINISH_VISCHUNK(dest, f0, f1) \ + subcc %g3, 8, %g3; \ + bl,pn %xcc, 95f; \ + faligndata %f0, %f1, %f48; \ + EX_ST_FP(STORE(std, %f48, %dest), U1_g3_8_fp); \ add %dest, 8, %dest; -#define UNEVEN_VISCHUNK_LAST(dest, f0, f1, left) \ - subcc %left, 8, %left; \ - bl,pn %xcc, 95f; \ +#define UNEVEN_VISCHUNK_LAST(dest, f0, f1) \ + subcc %g3, 8, %g3; \ + bl,pn %xcc, 95f; \ fsrc2 %f0, %f1; -#define UNEVEN_VISCHUNK(dest, f0, f1, left) \ - UNEVEN_VISCHUNK_LAST(dest, f0, f1, left) \ +#define UNEVEN_VISCHUNK(dest, f0, f1) \ + UNEVEN_VISCHUNK_LAST(dest, f0, f1) \ ba,a,pt %xcc, 93f; .register %g2,#scratch .register %g3,#scratch .text +#ifndef EX_RETVAL +#define EX_RETVAL(x) x +ENTRY(U1_g1_1_fp) + VISExitHalf + add %g1, 1, %g1 + add %g1, %g2, %g1 + retl + add %g1, %o2, %o0 +ENDPROC(U1_g1_1_fp) +ENTRY(U1_g2_0_fp) + VISExitHalf + retl + add %g2, %o2, %o0 +ENDPROC(U1_g2_0_fp) +ENTRY(U1_g2_8_fp) + VISExitHalf + add %g2, 8, %g2 + retl + add %g2, %o2, %o0 +ENDPROC(U1_g2_8_fp) +ENTRY(U1_gs_0_fp) + VISExitHalf + add %GLOBAL_SPARE, %g3, %o0 + retl + add %o0, %o2, %o0 +ENDPROC(U1_gs_0_fp) +ENTRY(U1_gs_80_fp) + VISExitHalf + add %GLOBAL_SPARE, 0x80, %GLOBAL_SPARE + add %GLOBAL_SPARE, %g3, %o0 + retl + add %o0, %o2, %o0 +ENDPROC(U1_gs_80_fp) +ENTRY(U1_gs_40_fp) + VISExitHalf + add %GLOBAL_SPARE, 0x40, %GLOBAL_SPARE + add %GLOBAL_SPARE, %g3, %o0 + retl + add %o0, %o2, %o0 +ENDPROC(U1_gs_40_fp) +ENTRY(U1_g3_0_fp) + VISExitHalf + retl + add %g3, %o2, %o0 +ENDPROC(U1_g3_0_fp) +ENTRY(U1_g3_8_fp) + VISExitHalf + add %g3, 8, %g3 + retl + add %g3, %o2, %o0 +ENDPROC(U1_g3_8_fp) +ENTRY(U1_o2_0_fp) + VISExitHalf + retl + mov %o2, %o0 +ENDPROC(U1_o2_0_fp) +ENTRY(U1_o2_1_fp) + VISExitHalf + retl + add %o2, 1, %o0 +ENDPROC(U1_o2_1_fp) +ENTRY(U1_gs_0) + VISExitHalf + retl + add %GLOBAL_SPARE, %o2, %o0 +ENDPROC(U1_gs_0) +ENTRY(U1_gs_8) + VISExitHalf + add %GLOBAL_SPARE, %o2, %GLOBAL_SPARE + retl + add %GLOBAL_SPARE, 0x8, %o0 +ENDPROC(U1_gs_8) +ENTRY(U1_gs_10) + VISExitHalf + add %GLOBAL_SPARE, %o2, %GLOBAL_SPARE + retl + add %GLOBAL_SPARE, 0x10, %o0 +ENDPROC(U1_gs_10) +ENTRY(U1_o2_0) + retl + mov %o2, %o0 +ENDPROC(U1_o2_0) +ENTRY(U1_o2_8) + retl + add %o2, 8, %o0 +ENDPROC(U1_o2_8) +ENTRY(U1_o2_4) + retl + add %o2, 4, %o0 +ENDPROC(U1_o2_4) +ENTRY(U1_o2_1) + retl + add %o2, 1, %o0 +ENDPROC(U1_o2_1) +ENTRY(U1_g1_0) + retl + add %g1, %o2, %o0 +ENDPROC(U1_g1_0) +ENTRY(U1_g1_1) + add %g1, 1, %g1 + retl + add %g1, %o2, %o0 +ENDPROC(U1_g1_1) +ENTRY(U1_gs_0_o2_adj) + and %o2, 7, %o2 + retl + add %GLOBAL_SPARE, %o2, %o0 +ENDPROC(U1_gs_0_o2_adj) +ENTRY(U1_gs_8_o2_adj) + and %o2, 7, %o2 + add %GLOBAL_SPARE, 8, %GLOBAL_SPARE + retl + add %GLOBAL_SPARE, %o2, %o0 +ENDPROC(U1_gs_8_o2_adj) +#endif + .align 64 .globl FUNC_NAME @@ -166,8 +279,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ and %g2, 0x38, %g2 1: subcc %g1, 0x1, %g1 - EX_LD_FP(LOAD(ldub, %o1 + 0x00, %o3)) - EX_ST_FP(STORE(stb, %o3, %o1 + %GLOBAL_SPARE)) + EX_LD_FP(LOAD(ldub, %o1 + 0x00, %o3), U1_g1_1_fp) + EX_ST_FP(STORE(stb, %o3, %o1 + %GLOBAL_SPARE), U1_g1_1_fp) bgu,pt %XCC, 1b add %o1, 0x1, %o1 @@ -178,20 +291,20 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ be,pt %icc, 3f alignaddr %o1, %g0, %o1 - EX_LD_FP(LOAD(ldd, %o1, %f4)) -1: EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f6)) + EX_LD_FP(LOAD(ldd, %o1, %f4), U1_g2_0_fp) +1: EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f6), U1_g2_0_fp) add %o1, 0x8, %o1 subcc %g2, 0x8, %g2 faligndata %f4, %f6, %f0 - EX_ST_FP(STORE(std, %f0, %o0)) + EX_ST_FP(STORE(std, %f0, %o0), U1_g2_8_fp) be,pn %icc, 3f add %o0, 0x8, %o0 - EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f4)) + EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f4), U1_g2_0_fp) add %o1, 0x8, %o1 subcc %g2, 0x8, %g2 faligndata %f6, %f4, %f0 - EX_ST_FP(STORE(std, %f0, %o0)) + EX_ST_FP(STORE(std, %f0, %o0), U1_g2_8_fp) bne,pt %icc, 1b add %o0, 0x8, %o0 @@ -214,13 +327,13 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ add %g1, %GLOBAL_SPARE, %g1 subcc %o2, %g3, %o2 - EX_LD_FP(LOAD_BLK(%o1, %f0)) + EX_LD_FP(LOAD_BLK(%o1, %f0), U1_gs_0_fp) add %o1, 0x40, %o1 add %g1, %g3, %g1 - EX_LD_FP(LOAD_BLK(%o1, %f16)) + EX_LD_FP(LOAD_BLK(%o1, %f16), U1_gs_0_fp) add %o1, 0x40, %o1 sub %GLOBAL_SPARE, 0x80, %GLOBAL_SPARE - EX_LD_FP(LOAD_BLK(%o1, %f32)) + EX_LD_FP(LOAD_BLK(%o1, %f32), U1_gs_80_fp) add %o1, 0x40, %o1 /* There are 8 instances of the unrolled loop, @@ -240,11 +353,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ .align 64 1: FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16) - LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) + LOOP_CHUNK1(o1, o0, 1f) FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32) - LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) + LOOP_CHUNK2(o1, o0, 2f) FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0) - LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) + LOOP_CHUNK3(o1, o0, 3f) ba,pt %xcc, 1b+4 faligndata %f0, %f2, %f48 1: FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32) @@ -261,11 +374,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ STORE_JUMP(o0, f48, 56f) 1: FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18) - LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) + LOOP_CHUNK1(o1, o0, 1f) FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34) - LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) + LOOP_CHUNK2(o1, o0, 2f) FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2) - LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) + LOOP_CHUNK3(o1, o0, 3f) ba,pt %xcc, 1b+4 faligndata %f2, %f4, %f48 1: FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34) @@ -282,11 +395,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ STORE_JUMP(o0, f48, 57f) 1: FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20) - LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) + LOOP_CHUNK1(o1, o0, 1f) FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36) - LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) + LOOP_CHUNK2(o1, o0, 2f) FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4) - LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) + LOOP_CHUNK3(o1, o0, 3f) ba,pt %xcc, 1b+4 faligndata %f4, %f6, %f48 1: FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36) @@ -303,11 +416,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ STORE_JUMP(o0, f48, 58f) 1: FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22) - LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) + LOOP_CHUNK1(o1, o0, 1f) FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38) - LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) + LOOP_CHUNK2(o1, o0, 2f) FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6) - LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) + LOOP_CHUNK3(o1, o0, 3f) ba,pt %xcc, 1b+4 faligndata %f6, %f8, %f48 1: FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38) @@ -324,11 +437,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ STORE_JUMP(o0, f48, 59f) 1: FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24) - LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) + LOOP_CHUNK1(o1, o0, 1f) FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40) - LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) + LOOP_CHUNK2(o1, o0, 2f) FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8) - LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) + LOOP_CHUNK3(o1, o0, 3f) ba,pt %xcc, 1b+4 faligndata %f8, %f10, %f48 1: FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40) @@ -345,11 +458,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ STORE_JUMP(o0, f48, 60f) 1: FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26) - LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) + LOOP_CHUNK1(o1, o0, 1f) FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42) - LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) + LOOP_CHUNK2(o1, o0, 2f) FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10) - LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) + LOOP_CHUNK3(o1, o0, 3f) ba,pt %xcc, 1b+4 faligndata %f10, %f12, %f48 1: FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42) @@ -366,11 +479,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ STORE_JUMP(o0, f48, 61f) 1: FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28) - LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) + LOOP_CHUNK1(o1, o0, 1f) FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44) - LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) + LOOP_CHUNK2(o1, o0, 2f) FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12) - LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) + LOOP_CHUNK3(o1, o0, 3f) ba,pt %xcc, 1b+4 faligndata %f12, %f14, %f48 1: FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44) @@ -387,11 +500,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ STORE_JUMP(o0, f48, 62f) 1: FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30) - LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) + LOOP_CHUNK1(o1, o0, 1f) FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46) - LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) + LOOP_CHUNK2(o1, o0, 2f) FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14) - LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) + LOOP_CHUNK3(o1, o0, 3f) ba,pt %xcc, 1b+4 faligndata %f14, %f16, %f48 1: FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46) @@ -407,53 +520,53 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46) STORE_JUMP(o0, f48, 63f) -40: FINISH_VISCHUNK(o0, f0, f2, g3) -41: FINISH_VISCHUNK(o0, f2, f4, g3) -42: FINISH_VISCHUNK(o0, f4, f6, g3) -43: FINISH_VISCHUNK(o0, f6, f8, g3) -44: FINISH_VISCHUNK(o0, f8, f10, g3) -45: FINISH_VISCHUNK(o0, f10, f12, g3) -46: FINISH_VISCHUNK(o0, f12, f14, g3) -47: UNEVEN_VISCHUNK(o0, f14, f0, g3) -48: FINISH_VISCHUNK(o0, f16, f18, g3) -49: FINISH_VISCHUNK(o0, f18, f20, g3) -50: FINISH_VISCHUNK(o0, f20, f22, g3) -51: FINISH_VISCHUNK(o0, f22, f24, g3) -52: FINISH_VISCHUNK(o0, f24, f26, g3) -53: FINISH_VISCHUNK(o0, f26, f28, g3) -54: FINISH_VISCHUNK(o0, f28, f30, g3) -55: UNEVEN_VISCHUNK(o0, f30, f0, g3) -56: FINISH_VISCHUNK(o0, f32, f34, g3) -57: FINISH_VISCHUNK(o0, f34, f36, g3) -58: FINISH_VISCHUNK(o0, f36, f38, g3) -59: FINISH_VISCHUNK(o0, f38, f40, g3) -60: FINISH_VISCHUNK(o0, f40, f42, g3) -61: FINISH_VISCHUNK(o0, f42, f44, g3) -62: FINISH_VISCHUNK(o0, f44, f46, g3) -63: UNEVEN_VISCHUNK_LAST(o0, f46, f0, g3) - -93: EX_LD_FP(LOAD(ldd, %o1, %f2)) +40: FINISH_VISCHUNK(o0, f0, f2) +41: FINISH_VISCHUNK(o0, f2, f4) +42: FINISH_VISCHUNK(o0, f4, f6) +43: FINISH_VISCHUNK(o0, f6, f8) +44: FINISH_VISCHUNK(o0, f8, f10) +45: FINISH_VISCHUNK(o0, f10, f12) +46: FINISH_VISCHUNK(o0, f12, f14) +47: UNEVEN_VISCHUNK(o0, f14, f0) +48: FINISH_VISCHUNK(o0, f16, f18) +49: FINISH_VISCHUNK(o0, f18, f20) +50: FINISH_VISCHUNK(o0, f20, f22) +51: FINISH_VISCHUNK(o0, f22, f24) +52: FINISH_VISCHUNK(o0, f24, f26) +53: FINISH_VISCHUNK(o0, f26, f28) +54: FINISH_VISCHUNK(o0, f28, f30) +55: UNEVEN_VISCHUNK(o0, f30, f0) +56: FINISH_VISCHUNK(o0, f32, f34) +57: FINISH_VISCHUNK(o0, f34, f36) +58: FINISH_VISCHUNK(o0, f36, f38) +59: FINISH_VISCHUNK(o0, f38, f40) +60: FINISH_VISCHUNK(o0, f40, f42) +61: FINISH_VISCHUNK(o0, f42, f44) +62: FINISH_VISCHUNK(o0, f44, f46) +63: UNEVEN_VISCHUNK_LAST(o0, f46, f0) + +93: EX_LD_FP(LOAD(ldd, %o1, %f2), U1_g3_0_fp) add %o1, 8, %o1 subcc %g3, 8, %g3 faligndata %f0, %f2, %f8 - EX_ST_FP(STORE(std, %f8, %o0)) + EX_ST_FP(STORE(std, %f8, %o0), U1_g3_8_fp) bl,pn %xcc, 95f add %o0, 8, %o0 - EX_LD_FP(LOAD(ldd, %o1, %f0)) + EX_LD_FP(LOAD(ldd, %o1, %f0), U1_g3_0_fp) add %o1, 8, %o1 subcc %g3, 8, %g3 faligndata %f2, %f0, %f8 - EX_ST_FP(STORE(std, %f8, %o0)) + EX_ST_FP(STORE(std, %f8, %o0), U1_g3_8_fp) bge,pt %xcc, 93b add %o0, 8, %o0 95: brz,pt %o2, 2f mov %g1, %o1 -1: EX_LD_FP(LOAD(ldub, %o1, %o3)) +1: EX_LD_FP(LOAD(ldub, %o1, %o3), U1_o2_0_fp) add %o1, 1, %o1 subcc %o2, 1, %o2 - EX_ST_FP(STORE(stb, %o3, %o0)) + EX_ST_FP(STORE(stb, %o3, %o0), U1_o2_1_fp) bne,pt %xcc, 1b add %o0, 1, %o0 @@ -469,27 +582,27 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ 72: andn %o2, 0xf, %GLOBAL_SPARE and %o2, 0xf, %o2 -1: EX_LD(LOAD(ldx, %o1 + 0x00, %o5)) - EX_LD(LOAD(ldx, %o1 + 0x08, %g1)) +1: EX_LD(LOAD(ldx, %o1 + 0x00, %o5), U1_gs_0) + EX_LD(LOAD(ldx, %o1 + 0x08, %g1), U1_gs_0) subcc %GLOBAL_SPARE, 0x10, %GLOBAL_SPARE - EX_ST(STORE(stx, %o5, %o1 + %o3)) + EX_ST(STORE(stx, %o5, %o1 + %o3), U1_gs_10) add %o1, 0x8, %o1 - EX_ST(STORE(stx, %g1, %o1 + %o3)) + EX_ST(STORE(stx, %g1, %o1 + %o3), U1_gs_8) bgu,pt %XCC, 1b add %o1, 0x8, %o1 73: andcc %o2, 0x8, %g0 be,pt %XCC, 1f nop - EX_LD(LOAD(ldx, %o1, %o5)) + EX_LD(LOAD(ldx, %o1, %o5), U1_o2_0) sub %o2, 0x8, %o2 - EX_ST(STORE(stx, %o5, %o1 + %o3)) + EX_ST(STORE(stx, %o5, %o1 + %o3), U1_o2_8) add %o1, 0x8, %o1 1: andcc %o2, 0x4, %g0 be,pt %XCC, 1f nop - EX_LD(LOAD(lduw, %o1, %o5)) + EX_LD(LOAD(lduw, %o1, %o5), U1_o2_0) sub %o2, 0x4, %o2 - EX_ST(STORE(stw, %o5, %o1 + %o3)) + EX_ST(STORE(stw, %o5, %o1 + %o3), U1_o2_4) add %o1, 0x4, %o1 1: cmp %o2, 0 be,pt %XCC, 85f @@ -503,9 +616,9 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ sub %g0, %g1, %g1 sub %o2, %g1, %o2 -1: EX_LD(LOAD(ldub, %o1, %o5)) +1: EX_LD(LOAD(ldub, %o1, %o5), U1_g1_0) subcc %g1, 1, %g1 - EX_ST(STORE(stb, %o5, %o1 + %o3)) + EX_ST(STORE(stb, %o5, %o1 + %o3), U1_g1_1) bgu,pt %icc, 1b add %o1, 1, %o1 @@ -521,16 +634,16 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ 8: mov 64, %o3 andn %o1, 0x7, %o1 - EX_LD(LOAD(ldx, %o1, %g2)) + EX_LD(LOAD(ldx, %o1, %g2), U1_o2_0) sub %o3, %g1, %o3 andn %o2, 0x7, %GLOBAL_SPARE sllx %g2, %g1, %g2 -1: EX_LD(LOAD(ldx, %o1 + 0x8, %g3)) +1: EX_LD(LOAD(ldx, %o1 + 0x8, %g3), U1_gs_0_o2_adj) subcc %GLOBAL_SPARE, 0x8, %GLOBAL_SPARE add %o1, 0x8, %o1 srlx %g3, %o3, %o5 or %o5, %g2, %o5 - EX_ST(STORE(stx, %o5, %o0)) + EX_ST(STORE(stx, %o5, %o0), U1_gs_8_o2_adj) add %o0, 0x8, %o0 bgu,pt %icc, 1b sllx %g3, %g1, %g2 @@ -548,9 +661,9 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ bne,pn %XCC, 90f sub %o0, %o1, %o3 -1: EX_LD(LOAD(lduw, %o1, %g1)) +1: EX_LD(LOAD(lduw, %o1, %g1), U1_o2_0) subcc %o2, 4, %o2 - EX_ST(STORE(stw, %g1, %o1 + %o3)) + EX_ST(STORE(stw, %g1, %o1 + %o3), U1_o2_4) bgu,pt %XCC, 1b add %o1, 4, %o1 @@ -558,9 +671,9 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ mov EX_RETVAL(%o4), %o0 .align 32 -90: EX_LD(LOAD(ldub, %o1, %g1)) +90: EX_LD(LOAD(ldub, %o1, %g1), U1_o2_0) subcc %o2, 1, %o2 - EX_ST(STORE(stb, %g1, %o1 + %o3)) + EX_ST(STORE(stb, %g1, %o1 + %o3), U1_o2_1) bgu,pt %XCC, 90b add %o1, 1, %o1 retl diff --git a/kernel/arch/sparc/lib/U3copy_from_user.S b/kernel/arch/sparc/lib/U3copy_from_user.S index 88ad73d86..db73010a1 100644 --- a/kernel/arch/sparc/lib/U3copy_from_user.S +++ b/kernel/arch/sparc/lib/U3copy_from_user.S @@ -3,19 +3,19 @@ * Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com) */ -#define EX_LD(x) \ +#define EX_LD(x,y) \ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_one; \ + .word 98b, y; \ .text; \ .align 4; -#define EX_LD_FP(x) \ +#define EX_LD_FP(x,y) \ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_one_fp;\ + .word 98b, y##_fp; \ .text; \ .align 4; diff --git a/kernel/arch/sparc/lib/U3copy_to_user.S b/kernel/arch/sparc/lib/U3copy_to_user.S index 845139d75..c4ee858e3 100644 --- a/kernel/arch/sparc/lib/U3copy_to_user.S +++ b/kernel/arch/sparc/lib/U3copy_to_user.S @@ -3,19 +3,19 @@ * Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com) */ -#define EX_ST(x) \ +#define EX_ST(x,y) \ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_one; \ + .word 98b, y; \ .text; \ .align 4; -#define EX_ST_FP(x) \ +#define EX_ST_FP(x,y) \ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_one_fp;\ + .word 98b, y##_fp; \ .text; \ .align 4; diff --git a/kernel/arch/sparc/lib/U3memcpy.S b/kernel/arch/sparc/lib/U3memcpy.S index 491ee69e4..54f98706b 100644 --- a/kernel/arch/sparc/lib/U3memcpy.S +++ b/kernel/arch/sparc/lib/U3memcpy.S @@ -4,6 +4,7 @@ */ #ifdef __KERNEL__ +#include #include #include #define GLOBAL_SPARE %g7 @@ -22,21 +23,17 @@ #endif #ifndef EX_LD -#define EX_LD(x) x +#define EX_LD(x,y) x #endif #ifndef EX_LD_FP -#define EX_LD_FP(x) x +#define EX_LD_FP(x,y) x #endif #ifndef EX_ST -#define EX_ST(x) x +#define EX_ST(x,y) x #endif #ifndef EX_ST_FP -#define EX_ST_FP(x) x -#endif - -#ifndef EX_RETVAL -#define EX_RETVAL(x) x +#define EX_ST_FP(x,y) x #endif #ifndef LOAD @@ -77,6 +74,87 @@ */ .text +#ifndef EX_RETVAL +#define EX_RETVAL(x) x +__restore_fp: + VISExitHalf + retl + nop +ENTRY(U3_retl_o2_plus_g2_plus_g1_plus_1_fp) + add %g1, 1, %g1 + add %g2, %g1, %g2 + ba,pt %xcc, __restore_fp + add %o2, %g2, %o0 +ENDPROC(U3_retl_o2_plus_g2_plus_g1_plus_1_fp) +ENTRY(U3_retl_o2_plus_g2_fp) + ba,pt %xcc, __restore_fp + add %o2, %g2, %o0 +ENDPROC(U3_retl_o2_plus_g2_fp) +ENTRY(U3_retl_o2_plus_g2_plus_8_fp) + add %g2, 8, %g2 + ba,pt %xcc, __restore_fp + add %o2, %g2, %o0 +ENDPROC(U3_retl_o2_plus_g2_plus_8_fp) +ENTRY(U3_retl_o2) + retl + mov %o2, %o0 +ENDPROC(U3_retl_o2) +ENTRY(U3_retl_o2_plus_1) + retl + add %o2, 1, %o0 +ENDPROC(U3_retl_o2_plus_1) +ENTRY(U3_retl_o2_plus_4) + retl + add %o2, 4, %o0 +ENDPROC(U3_retl_o2_plus_4) +ENTRY(U3_retl_o2_plus_8) + retl + add %o2, 8, %o0 +ENDPROC(U3_retl_o2_plus_8) +ENTRY(U3_retl_o2_plus_g1_plus_1) + add %g1, 1, %g1 + retl + add %o2, %g1, %o0 +ENDPROC(U3_retl_o2_plus_g1_plus_1) +ENTRY(U3_retl_o2_fp) + ba,pt %xcc, __restore_fp + mov %o2, %o0 +ENDPROC(U3_retl_o2_fp) +ENTRY(U3_retl_o2_plus_o3_sll_6_plus_0x80_fp) + sll %o3, 6, %o3 + add %o3, 0x80, %o3 + ba,pt %xcc, __restore_fp + add %o2, %o3, %o0 +ENDPROC(U3_retl_o2_plus_o3_sll_6_plus_0x80_fp) +ENTRY(U3_retl_o2_plus_o3_sll_6_plus_0x40_fp) + sll %o3, 6, %o3 + add %o3, 0x40, %o3 + ba,pt %xcc, __restore_fp + add %o2, %o3, %o0 +ENDPROC(U3_retl_o2_plus_o3_sll_6_plus_0x40_fp) +ENTRY(U3_retl_o2_plus_GS_plus_0x10) + add GLOBAL_SPARE, 0x10, GLOBAL_SPARE + retl + add %o2, GLOBAL_SPARE, %o0 +ENDPROC(U3_retl_o2_plus_GS_plus_0x10) +ENTRY(U3_retl_o2_plus_GS_plus_0x08) + add GLOBAL_SPARE, 0x08, GLOBAL_SPARE + retl + add %o2, GLOBAL_SPARE, %o0 +ENDPROC(U3_retl_o2_plus_GS_plus_0x08) +ENTRY(U3_retl_o2_and_7_plus_GS) + and %o2, 7, %o2 + retl + add %o2, GLOBAL_SPARE, %o2 +ENDPROC(U3_retl_o2_and_7_plus_GS) +ENTRY(U3_retl_o2_and_7_plus_GS_plus_8) + add GLOBAL_SPARE, 8, GLOBAL_SPARE + and %o2, 7, %o2 + retl + add %o2, GLOBAL_SPARE, %o2 +ENDPROC(U3_retl_o2_and_7_plus_GS_plus_8) +#endif + .align 64 /* The cheetah's flexible spine, oversized liver, enlarged heart, @@ -126,8 +204,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ and %g2, 0x38, %g2 1: subcc %g1, 0x1, %g1 - EX_LD_FP(LOAD(ldub, %o1 + 0x00, %o3)) - EX_ST_FP(STORE(stb, %o3, %o1 + GLOBAL_SPARE)) + EX_LD_FP(LOAD(ldub, %o1 + 0x00, %o3), U3_retl_o2_plus_g2_plus_g1_plus_1) + EX_ST_FP(STORE(stb, %o3, %o1 + GLOBAL_SPARE), U3_retl_o2_plus_g2_plus_g1_plus_1) bgu,pt %XCC, 1b add %o1, 0x1, %o1 @@ -138,20 +216,20 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ be,pt %icc, 3f alignaddr %o1, %g0, %o1 - EX_LD_FP(LOAD(ldd, %o1, %f4)) -1: EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f6)) + EX_LD_FP(LOAD(ldd, %o1, %f4), U3_retl_o2_plus_g2) +1: EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f6), U3_retl_o2_plus_g2) add %o1, 0x8, %o1 subcc %g2, 0x8, %g2 faligndata %f4, %f6, %f0 - EX_ST_FP(STORE(std, %f0, %o0)) + EX_ST_FP(STORE(std, %f0, %o0), U3_retl_o2_plus_g2_plus_8) be,pn %icc, 3f add %o0, 0x8, %o0 - EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f4)) + EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f4), U3_retl_o2_plus_g2) add %o1, 0x8, %o1 subcc %g2, 0x8, %g2 faligndata %f6, %f4, %f2 - EX_ST_FP(STORE(std, %f2, %o0)) + EX_ST_FP(STORE(std, %f2, %o0), U3_retl_o2_plus_g2_plus_8) bne,pt %icc, 1b add %o0, 0x8, %o0 @@ -161,25 +239,25 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ LOAD(prefetch, %o1 + 0x080, #one_read) LOAD(prefetch, %o1 + 0x0c0, #one_read) LOAD(prefetch, %o1 + 0x100, #one_read) - EX_LD_FP(LOAD(ldd, %o1 + 0x000, %f0)) + EX_LD_FP(LOAD(ldd, %o1 + 0x000, %f0), U3_retl_o2) LOAD(prefetch, %o1 + 0x140, #one_read) - EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2)) + EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2), U3_retl_o2) LOAD(prefetch, %o1 + 0x180, #one_read) - EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4)) + EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4), U3_retl_o2) LOAD(prefetch, %o1 + 0x1c0, #one_read) faligndata %f0, %f2, %f16 - EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6)) + EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6), U3_retl_o2) faligndata %f2, %f4, %f18 - EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8)) + EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8), U3_retl_o2) faligndata %f4, %f6, %f20 - EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10)) + EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10), U3_retl_o2) faligndata %f6, %f8, %f22 - EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12)) + EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12), U3_retl_o2) faligndata %f8, %f10, %f24 - EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14)) + EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14), U3_retl_o2) faligndata %f10, %f12, %f26 - EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0)) + EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0), U3_retl_o2) subcc GLOBAL_SPARE, 0x80, GLOBAL_SPARE add %o1, 0x40, %o1 @@ -190,26 +268,26 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ .align 64 1: - EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2)) + EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2), U3_retl_o2_plus_o3_sll_6_plus_0x80) faligndata %f12, %f14, %f28 - EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4)) + EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4), U3_retl_o2_plus_o3_sll_6_plus_0x80) faligndata %f14, %f0, %f30 - EX_ST_FP(STORE_BLK(%f16, %o0)) - EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6)) + EX_ST_FP(STORE_BLK(%f16, %o0), U3_retl_o2_plus_o3_sll_6_plus_0x80) + EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6), U3_retl_o2_plus_o3_sll_6_plus_0x40) faligndata %f0, %f2, %f16 add %o0, 0x40, %o0 - EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8)) + EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8), U3_retl_o2_plus_o3_sll_6_plus_0x40) faligndata %f2, %f4, %f18 - EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10)) + EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10), U3_retl_o2_plus_o3_sll_6_plus_0x40) faligndata %f4, %f6, %f20 - EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12)) + EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12), U3_retl_o2_plus_o3_sll_6_plus_0x40) subcc %o3, 0x01, %o3 faligndata %f6, %f8, %f22 - EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14)) + EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14), U3_retl_o2_plus_o3_sll_6_plus_0x80) faligndata %f8, %f10, %f24 - EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0)) + EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0), U3_retl_o2_plus_o3_sll_6_plus_0x80) LOAD(prefetch, %o1 + 0x1c0, #one_read) faligndata %f10, %f12, %f26 bg,pt %XCC, 1b @@ -217,29 +295,29 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ /* Finally we copy the last full 64-byte block. */ 2: - EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2)) + EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2), U3_retl_o2_plus_o3_sll_6_plus_0x80) faligndata %f12, %f14, %f28 - EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4)) + EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4), U3_retl_o2_plus_o3_sll_6_plus_0x80) faligndata %f14, %f0, %f30 - EX_ST_FP(STORE_BLK(%f16, %o0)) - EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6)) + EX_ST_FP(STORE_BLK(%f16, %o0), U3_retl_o2_plus_o3_sll_6_plus_0x80) + EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6), U3_retl_o2_plus_o3_sll_6_plus_0x40) faligndata %f0, %f2, %f16 - EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8)) + EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8), U3_retl_o2_plus_o3_sll_6_plus_0x40) faligndata %f2, %f4, %f18 - EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10)) + EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10), U3_retl_o2_plus_o3_sll_6_plus_0x40) faligndata %f4, %f6, %f20 - EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12)) + EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12), U3_retl_o2_plus_o3_sll_6_plus_0x40) faligndata %f6, %f8, %f22 - EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14)) + EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14), U3_retl_o2_plus_o3_sll_6_plus_0x40) faligndata %f8, %f10, %f24 cmp %g1, 0 be,pt %XCC, 1f add %o0, 0x40, %o0 - EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0)) + EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0), U3_retl_o2_plus_o3_sll_6_plus_0x40) 1: faligndata %f10, %f12, %f26 faligndata %f12, %f14, %f28 faligndata %f14, %f0, %f30 - EX_ST_FP(STORE_BLK(%f16, %o0)) + EX_ST_FP(STORE_BLK(%f16, %o0), U3_retl_o2_plus_o3_sll_6_plus_0x40) add %o0, 0x40, %o0 add %o1, 0x40, %o1 membar #Sync @@ -259,20 +337,20 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ sub %o2, %g2, %o2 be,a,pt %XCC, 1f - EX_LD_FP(LOAD(ldd, %o1 + 0x00, %f0)) + EX_LD_FP(LOAD(ldd, %o1 + 0x00, %f0), U3_retl_o2_plus_g2) -1: EX_LD_FP(LOAD(ldd, %o1 + 0x08, %f2)) +1: EX_LD_FP(LOAD(ldd, %o1 + 0x08, %f2), U3_retl_o2_plus_g2) add %o1, 0x8, %o1 subcc %g2, 0x8, %g2 faligndata %f0, %f2, %f8 - EX_ST_FP(STORE(std, %f8, %o0)) + EX_ST_FP(STORE(std, %f8, %o0), U3_retl_o2_plus_g2_plus_8) be,pn %XCC, 2f add %o0, 0x8, %o0 - EX_LD_FP(LOAD(ldd, %o1 + 0x08, %f0)) + EX_LD_FP(LOAD(ldd, %o1 + 0x08, %f0), U3_retl_o2_plus_g2) add %o1, 0x8, %o1 subcc %g2, 0x8, %g2 faligndata %f2, %f0, %f8 - EX_ST_FP(STORE(std, %f8, %o0)) + EX_ST_FP(STORE(std, %f8, %o0), U3_retl_o2_plus_g2_plus_8) bne,pn %XCC, 1b add %o0, 0x8, %o0 @@ -292,30 +370,33 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ andcc %o2, 0x8, %g0 be,pt %icc, 1f nop - EX_LD(LOAD(ldx, %o1, %o5)) - EX_ST(STORE(stx, %o5, %o1 + %o3)) + EX_LD(LOAD(ldx, %o1, %o5), U3_retl_o2) + EX_ST(STORE(stx, %o5, %o1 + %o3), U3_retl_o2) add %o1, 0x8, %o1 + sub %o2, 8, %o2 1: andcc %o2, 0x4, %g0 be,pt %icc, 1f nop - EX_LD(LOAD(lduw, %o1, %o5)) - EX_ST(STORE(stw, %o5, %o1 + %o3)) + EX_LD(LOAD(lduw, %o1, %o5), U3_retl_o2) + EX_ST(STORE(stw, %o5, %o1 + %o3), U3_retl_o2) add %o1, 0x4, %o1 + sub %o2, 4, %o2 1: andcc %o2, 0x2, %g0 be,pt %icc, 1f nop - EX_LD(LOAD(lduh, %o1, %o5)) - EX_ST(STORE(sth, %o5, %o1 + %o3)) + EX_LD(LOAD(lduh, %o1, %o5), U3_retl_o2) + EX_ST(STORE(sth, %o5, %o1 + %o3), U3_retl_o2) add %o1, 0x2, %o1 + sub %o2, 2, %o2 1: andcc %o2, 0x1, %g0 be,pt %icc, 85f nop - EX_LD(LOAD(ldub, %o1, %o5)) + EX_LD(LOAD(ldub, %o1, %o5), U3_retl_o2) ba,pt %xcc, 85f - EX_ST(STORE(stb, %o5, %o1 + %o3)) + EX_ST(STORE(stb, %o5, %o1 + %o3), U3_retl_o2) .align 64 70: /* 16 < len <= 64 */ @@ -326,26 +407,26 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ andn %o2, 0xf, GLOBAL_SPARE and %o2, 0xf, %o2 1: subcc GLOBAL_SPARE, 0x10, GLOBAL_SPARE - EX_LD(LOAD(ldx, %o1 + 0x00, %o5)) - EX_LD(LOAD(ldx, %o1 + 0x08, %g1)) - EX_ST(STORE(stx, %o5, %o1 + %o3)) + EX_LD(LOAD(ldx, %o1 + 0x00, %o5), U3_retl_o2_plus_GS_plus_0x10) + EX_LD(LOAD(ldx, %o1 + 0x08, %g1), U3_retl_o2_plus_GS_plus_0x10) + EX_ST(STORE(stx, %o5, %o1 + %o3), U3_retl_o2_plus_GS_plus_0x10) add %o1, 0x8, %o1 - EX_ST(STORE(stx, %g1, %o1 + %o3)) + EX_ST(STORE(stx, %g1, %o1 + %o3), U3_retl_o2_plus_GS_plus_0x08) bgu,pt %XCC, 1b add %o1, 0x8, %o1 73: andcc %o2, 0x8, %g0 be,pt %XCC, 1f nop sub %o2, 0x8, %o2 - EX_LD(LOAD(ldx, %o1, %o5)) - EX_ST(STORE(stx, %o5, %o1 + %o3)) + EX_LD(LOAD(ldx, %o1, %o5), U3_retl_o2_plus_8) + EX_ST(STORE(stx, %o5, %o1 + %o3), U3_retl_o2_plus_8) add %o1, 0x8, %o1 1: andcc %o2, 0x4, %g0 be,pt %XCC, 1f nop sub %o2, 0x4, %o2 - EX_LD(LOAD(lduw, %o1, %o5)) - EX_ST(STORE(stw, %o5, %o1 + %o3)) + EX_LD(LOAD(lduw, %o1, %o5), U3_retl_o2_plus_4) + EX_ST(STORE(stw, %o5, %o1 + %o3), U3_retl_o2_plus_4) add %o1, 0x4, %o1 1: cmp %o2, 0 be,pt %XCC, 85f @@ -361,8 +442,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ sub %o2, %g1, %o2 1: subcc %g1, 1, %g1 - EX_LD(LOAD(ldub, %o1, %o5)) - EX_ST(STORE(stb, %o5, %o1 + %o3)) + EX_LD(LOAD(ldub, %o1, %o5), U3_retl_o2_plus_g1_plus_1) + EX_ST(STORE(stb, %o5, %o1 + %o3), U3_retl_o2_plus_g1_plus_1) bgu,pt %icc, 1b add %o1, 1, %o1 @@ -378,16 +459,16 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ 8: mov 64, %o3 andn %o1, 0x7, %o1 - EX_LD(LOAD(ldx, %o1, %g2)) + EX_LD(LOAD(ldx, %o1, %g2), U3_retl_o2) sub %o3, %g1, %o3 andn %o2, 0x7, GLOBAL_SPARE sllx %g2, %g1, %g2 -1: EX_LD(LOAD(ldx, %o1 + 0x8, %g3)) +1: EX_LD(LOAD(ldx, %o1 + 0x8, %g3), U3_retl_o2_and_7_plus_GS) subcc GLOBAL_SPARE, 0x8, GLOBAL_SPARE add %o1, 0x8, %o1 srlx %g3, %o3, %o5 or %o5, %g2, %o5 - EX_ST(STORE(stx, %o5, %o0)) + EX_ST(STORE(stx, %o5, %o0), U3_retl_o2_and_7_plus_GS_plus_8) add %o0, 0x8, %o0 bgu,pt %icc, 1b sllx %g3, %g1, %g2 @@ -407,8 +488,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ 1: subcc %o2, 4, %o2 - EX_LD(LOAD(lduw, %o1, %g1)) - EX_ST(STORE(stw, %g1, %o1 + %o3)) + EX_LD(LOAD(lduw, %o1, %g1), U3_retl_o2_plus_4) + EX_ST(STORE(stw, %g1, %o1 + %o3), U3_retl_o2_plus_4) bgu,pt %XCC, 1b add %o1, 4, %o1 @@ -418,8 +499,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ .align 32 90: subcc %o2, 1, %o2 - EX_LD(LOAD(ldub, %o1, %g1)) - EX_ST(STORE(stb, %g1, %o1 + %o3)) + EX_LD(LOAD(ldub, %o1, %g1), U3_retl_o2_plus_1) + EX_ST(STORE(stb, %g1, %o1 + %o3), U3_retl_o2_plus_1) bgu,pt %XCC, 90b add %o1, 1, %o1 retl diff --git a/kernel/arch/sparc/lib/copy_in_user.S b/kernel/arch/sparc/lib/copy_in_user.S index 302c0e60d..4c89b486f 100644 --- a/kernel/arch/sparc/lib/copy_in_user.S +++ b/kernel/arch/sparc/lib/copy_in_user.S @@ -8,18 +8,33 @@ #define XCC xcc -#define EX(x,y) \ +#define EX(x,y,z) \ 98: x,y; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_one; \ + .word 98b, z; \ .text; \ .align 4; +#define EX_O4(x,y) EX(x,y,__retl_o4_plus_8) +#define EX_O2_4(x,y) EX(x,y,__retl_o2_plus_4) +#define EX_O2_1(x,y) EX(x,y,__retl_o2_plus_1) + .register %g2,#scratch .register %g3,#scratch .text +__retl_o4_plus_8: + add %o4, %o2, %o4 + retl + add %o4, 8, %o0 +__retl_o2_plus_4: + retl + add %o2, 4, %o0 +__retl_o2_plus_1: + retl + add %o2, 1, %o0 + .align 32 /* Don't try to get too fancy here, just nice and @@ -44,8 +59,8 @@ ENTRY(___copy_in_user) /* %o0=dst, %o1=src, %o2=len */ andn %o2, 0x7, %o4 and %o2, 0x7, %o2 1: subcc %o4, 0x8, %o4 - EX(ldxa [%o1] %asi, %o5) - EX(stxa %o5, [%o0] %asi) + EX_O4(ldxa [%o1] %asi, %o5) + EX_O4(stxa %o5, [%o0] %asi) add %o1, 0x8, %o1 bgu,pt %XCC, 1b add %o0, 0x8, %o0 @@ -53,8 +68,8 @@ ENTRY(___copy_in_user) /* %o0=dst, %o1=src, %o2=len */ be,pt %XCC, 1f nop sub %o2, 0x4, %o2 - EX(lduwa [%o1] %asi, %o5) - EX(stwa %o5, [%o0] %asi) + EX_O2_4(lduwa [%o1] %asi, %o5) + EX_O2_4(stwa %o5, [%o0] %asi) add %o1, 0x4, %o1 add %o0, 0x4, %o0 1: cmp %o2, 0 @@ -70,8 +85,8 @@ ENTRY(___copy_in_user) /* %o0=dst, %o1=src, %o2=len */ 82: subcc %o2, 4, %o2 - EX(lduwa [%o1] %asi, %g1) - EX(stwa %g1, [%o0] %asi) + EX_O2_4(lduwa [%o1] %asi, %g1) + EX_O2_4(stwa %g1, [%o0] %asi) add %o1, 4, %o1 bgu,pt %XCC, 82b add %o0, 4, %o0 @@ -82,8 +97,8 @@ ENTRY(___copy_in_user) /* %o0=dst, %o1=src, %o2=len */ .align 32 90: subcc %o2, 1, %o2 - EX(lduba [%o1] %asi, %g1) - EX(stba %g1, [%o0] %asi) + EX_O2_1(lduba [%o1] %asi, %g1) + EX_O2_1(stba %g1, [%o0] %asi) add %o1, 1, %o1 bgu,pt %XCC, 90b add %o0, 1, %o0 diff --git a/kernel/arch/sparc/lib/user_fixup.c b/kernel/arch/sparc/lib/user_fixup.c deleted file mode 100644 index ac96ae236..000000000 --- a/kernel/arch/sparc/lib/user_fixup.c +++ /dev/null @@ -1,71 +0,0 @@ -/* user_fixup.c: Fix up user copy faults. - * - * Copyright (C) 2004 David S. Miller - */ - -#include -#include -#include -#include -#include - -#include - -/* Calculating the exact fault address when using - * block loads and stores can be very complicated. - * - * Instead of trying to be clever and handling all - * of the cases, just fix things up simply here. - */ - -static unsigned long compute_size(unsigned long start, unsigned long size, unsigned long *offset) -{ - unsigned long fault_addr = current_thread_info()->fault_address; - unsigned long end = start + size; - - if (fault_addr < start || fault_addr >= end) { - *offset = 0; - } else { - *offset = fault_addr - start; - size = end - fault_addr; - } - return size; -} - -unsigned long copy_from_user_fixup(void *to, const void __user *from, unsigned long size) -{ - unsigned long offset; - - size = compute_size((unsigned long) from, size, &offset); - if (likely(size)) - memset(to + offset, 0, size); - - return size; -} -EXPORT_SYMBOL(copy_from_user_fixup); - -unsigned long copy_to_user_fixup(void __user *to, const void *from, unsigned long size) -{ - unsigned long offset; - - return compute_size((unsigned long) to, size, &offset); -} -EXPORT_SYMBOL(copy_to_user_fixup); - -unsigned long copy_in_user_fixup(void __user *to, void __user *from, unsigned long size) -{ - unsigned long fault_addr = current_thread_info()->fault_address; - unsigned long start = (unsigned long) to; - unsigned long end = start + size; - - if (fault_addr >= start && fault_addr < end) - return end - fault_addr; - - start = (unsigned long) from; - end = start + size; - if (fault_addr >= start && fault_addr < end) - return end - fault_addr; - - return size; -} -EXPORT_SYMBOL(copy_in_user_fixup); diff --git a/kernel/arch/sparc/mm/fault_64.c b/kernel/arch/sparc/mm/fault_64.c index dbabe5713..e15f33715 100644 --- a/kernel/arch/sparc/mm/fault_64.c +++ b/kernel/arch/sparc/mm/fault_64.c @@ -479,14 +479,14 @@ good_area: up_read(&mm->mmap_sem); mm_rss = get_mm_rss(mm); -#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) - mm_rss -= (mm->context.huge_pte_count * (HPAGE_SIZE / PAGE_SIZE)); +#if defined(CONFIG_TRANSPARENT_HUGEPAGE) + mm_rss -= (mm->context.thp_pte_count * (HPAGE_SIZE / PAGE_SIZE)); #endif if (unlikely(mm_rss > mm->context.tsb_block[MM_TSB_BASE].tsb_rss_limit)) tsb_grow(mm, MM_TSB_BASE, mm_rss); #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) - mm_rss = mm->context.huge_pte_count; + mm_rss = mm->context.hugetlb_pte_count + mm->context.thp_pte_count; if (unlikely(mm_rss > mm->context.tsb_block[MM_TSB_HUGE].tsb_rss_limit)) { if (mm->context.tsb_block[MM_TSB_HUGE].tsb) diff --git a/kernel/arch/sparc/mm/hugetlbpage.c b/kernel/arch/sparc/mm/hugetlbpage.c index 131eaf4ad..da1142401 100644 --- a/kernel/arch/sparc/mm/hugetlbpage.c +++ b/kernel/arch/sparc/mm/hugetlbpage.c @@ -176,17 +176,31 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t entry) { int i; + pte_t orig[2]; + unsigned long nptes; if (!pte_present(*ptep) && pte_present(entry)) - mm->context.huge_pte_count++; + mm->context.hugetlb_pte_count++; addr &= HPAGE_MASK; - for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) { - set_pte_at(mm, addr, ptep, entry); + + nptes = 1 << HUGETLB_PAGE_ORDER; + orig[0] = *ptep; + orig[1] = *(ptep + nptes / 2); + for (i = 0; i < nptes; i++) { + *ptep = entry; ptep++; addr += PAGE_SIZE; pte_val(entry) += PAGE_SIZE; } + + /* Issue TLB flush at REAL_HPAGE_SIZE boundaries */ + addr -= REAL_HPAGE_SIZE; + ptep -= nptes / 2; + maybe_tlb_batch_add(mm, addr, ptep, orig[1], 0); + addr -= REAL_HPAGE_SIZE; + ptep -= nptes / 2; + maybe_tlb_batch_add(mm, addr, ptep, orig[0], 0); } pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, @@ -194,19 +208,28 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, { pte_t entry; int i; + unsigned long nptes; entry = *ptep; if (pte_present(entry)) - mm->context.huge_pte_count--; + mm->context.hugetlb_pte_count--; addr &= HPAGE_MASK; - - for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) { - pte_clear(mm, addr, ptep); + nptes = 1 << HUGETLB_PAGE_ORDER; + for (i = 0; i < nptes; i++) { + *ptep = __pte(0UL); addr += PAGE_SIZE; ptep++; } + /* Issue TLB flush at REAL_HPAGE_SIZE boundaries */ + addr -= REAL_HPAGE_SIZE; + ptep -= nptes / 2; + maybe_tlb_batch_add(mm, addr, ptep, entry, 0); + addr -= REAL_HPAGE_SIZE; + ptep -= nptes / 2; + maybe_tlb_batch_add(mm, addr, ptep, entry, 0); + return entry; } diff --git a/kernel/arch/sparc/mm/init_64.c b/kernel/arch/sparc/mm/init_64.c index 3025bd57f..3d3414c14 100644 --- a/kernel/arch/sparc/mm/init_64.c +++ b/kernel/arch/sparc/mm/init_64.c @@ -324,18 +324,6 @@ static void __update_mmu_tsb_insert(struct mm_struct *mm, unsigned long tsb_inde tsb_insert(tsb, tag, tte); } -#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) -static inline bool is_hugetlb_pte(pte_t pte) -{ - if ((tlb_type == hypervisor && - (pte_val(pte) & _PAGE_SZALL_4V) == _PAGE_SZHUGE_4V) || - (tlb_type != hypervisor && - (pte_val(pte) & _PAGE_SZALL_4U) == _PAGE_SZHUGE_4U)) - return true; - return false; -} -#endif - void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep) { struct mm_struct *mm; @@ -358,7 +346,8 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t * spin_lock_irqsave(&mm->context.lock, flags); #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) - if (mm->context.huge_pte_count && is_hugetlb_pte(pte)) + if ((mm->context.hugetlb_pte_count || mm->context.thp_pte_count) && + is_hugetlb_pte(pte)) __update_mmu_tsb_insert(mm, MM_TSB_HUGE, REAL_HPAGE_SHIFT, address, pte_val(pte)); else @@ -811,8 +800,10 @@ struct mdesc_mblock { }; static struct mdesc_mblock *mblocks; static int num_mblocks; +static int find_numa_node_for_addr(unsigned long pa, + struct node_mem_mask *pnode_mask); -static unsigned long ra_to_pa(unsigned long addr) +static unsigned long __init ra_to_pa(unsigned long addr) { int i; @@ -828,8 +819,11 @@ static unsigned long ra_to_pa(unsigned long addr) return addr; } -static int find_node(unsigned long addr) +static int __init find_node(unsigned long addr) { + static bool search_mdesc = true; + static struct node_mem_mask last_mem_mask = { ~0UL, ~0UL }; + static int last_index; int i; addr = ra_to_pa(addr); @@ -839,13 +833,30 @@ static int find_node(unsigned long addr) if ((addr & p->mask) == p->val) return i; } - /* The following condition has been observed on LDOM guests.*/ - WARN_ONCE(1, "find_node: A physical address doesn't match a NUMA node" - " rule. Some physical memory will be owned by node 0."); - return 0; + /* The following condition has been observed on LDOM guests because + * node_masks only contains the best latency mask and value. + * LDOM guest's mdesc can contain a single latency group to + * cover multiple address range. Print warning message only if the + * address cannot be found in node_masks nor mdesc. + */ + if ((search_mdesc) && + ((addr & last_mem_mask.mask) != last_mem_mask.val)) { + /* find the available node in the mdesc */ + last_index = find_numa_node_for_addr(addr, &last_mem_mask); + numadbg("find_node: latency group for address 0x%lx is %d\n", + addr, last_index); + if ((last_index < 0) || (last_index >= num_node_masks)) { + /* WARN_ONCE() and use default group 0 */ + WARN_ONCE(1, "find_node: A physical address doesn't match a NUMA node rule. Some physical memory will be owned by node 0."); + search_mdesc = false; + last_index = 0; + } + } + + return last_index; } -static u64 memblock_nid_range(u64 start, u64 end, int *nid) +static u64 __init memblock_nid_range(u64 start, u64 end, int *nid) { *nid = find_node(start); start += PAGE_SIZE; @@ -1169,6 +1180,41 @@ int __node_distance(int from, int to) return numa_latency[from][to]; } +static int find_numa_node_for_addr(unsigned long pa, + struct node_mem_mask *pnode_mask) +{ + struct mdesc_handle *md = mdesc_grab(); + u64 node, arc; + int i = 0; + + node = mdesc_node_by_name(md, MDESC_NODE_NULL, "latency-groups"); + if (node == MDESC_NODE_NULL) + goto out; + + mdesc_for_each_node_by_name(md, node, "group") { + mdesc_for_each_arc(arc, md, node, MDESC_ARC_TYPE_FWD) { + u64 target = mdesc_arc_target(md, arc); + struct mdesc_mlgroup *m = find_mlgroup(target); + + if (!m) + continue; + if ((pa & m->mask) == m->match) { + if (pnode_mask) { + pnode_mask->mask = m->mask; + pnode_mask->val = m->match; + } + mdesc_release(md); + return i; + } + } + i++; + } + +out: + mdesc_release(md); + return -1; +} + static int find_best_numa_node_for_mlgroup(struct mdesc_mlgroup *grp) { int i; @@ -1267,13 +1313,6 @@ static int __init numa_parse_mdesc(void) int i, j, err, count; u64 node; - /* Some sane defaults for numa latency values */ - for (i = 0; i < MAX_NUMNODES; i++) { - for (j = 0; j < MAX_NUMNODES; j++) - numa_latency[i][j] = (i == j) ? - LOCAL_DISTANCE : REMOTE_DISTANCE; - } - node = mdesc_node_by_name(md, MDESC_NODE_NULL, "latency-groups"); if (node == MDESC_NODE_NULL) { mdesc_release(md); @@ -1369,10 +1408,18 @@ static int __init numa_parse_sun4u(void) static int __init bootmem_init_numa(void) { + int i, j; int err = -1; numadbg("bootmem_init_numa()\n"); + /* Some sane defaults for numa latency values */ + for (i = 0; i < MAX_NUMNODES; i++) { + for (j = 0; j < MAX_NUMNODES; j++) + numa_latency[i][j] = (i == j) ? + LOCAL_DISTANCE : REMOTE_DISTANCE; + } + if (numa_enabled) { if (tlb_type == hypervisor) err = numa_parse_mdesc(); @@ -2832,9 +2879,10 @@ void hugetlb_setup(struct pt_regs *regs) * the Data-TLB for huge pages. */ if (tlb_type == cheetah_plus) { + bool need_context_reload = false; unsigned long ctx; - spin_lock(&ctx_alloc_lock); + spin_lock_irq(&ctx_alloc_lock); ctx = mm->context.sparc64_ctx_val; ctx &= ~CTX_PGSZ_MASK; ctx |= CTX_PGSZ_BASE << CTX_PGSZ0_SHIFT; @@ -2853,9 +2901,12 @@ void hugetlb_setup(struct pt_regs *regs) * also executing in this address space. */ mm->context.sparc64_ctx_val = ctx; - on_each_cpu(context_reload, mm, 0); + need_context_reload = true; } - spin_unlock(&ctx_alloc_lock); + spin_unlock_irq(&ctx_alloc_lock); + + if (need_context_reload) + on_each_cpu(context_reload, mm, 0); } } #endif diff --git a/kernel/arch/sparc/mm/tlb.c b/kernel/arch/sparc/mm/tlb.c index 9df2190c0..3659d37b4 100644 --- a/kernel/arch/sparc/mm/tlb.c +++ b/kernel/arch/sparc/mm/tlb.c @@ -67,7 +67,7 @@ void arch_leave_lazy_mmu_mode(void) } static void tlb_batch_add_one(struct mm_struct *mm, unsigned long vaddr, - bool exec) + bool exec, bool huge) { struct tlb_batch *tb = &get_cpu_var(tlb_batch); unsigned long nr; @@ -84,13 +84,21 @@ static void tlb_batch_add_one(struct mm_struct *mm, unsigned long vaddr, } if (!tb->active) { - flush_tsb_user_page(mm, vaddr); + flush_tsb_user_page(mm, vaddr, huge); global_flush_tlb_page(mm, vaddr); goto out; } - if (nr == 0) + if (nr == 0) { tb->mm = mm; + tb->huge = huge; + } + + if (tb->huge != huge) { + flush_tlb_pending(); + tb->huge = huge; + nr = 0; + } tb->vaddrs[nr] = vaddr; tb->tlb_nr = ++nr; @@ -104,6 +112,8 @@ out: void tlb_batch_add(struct mm_struct *mm, unsigned long vaddr, pte_t *ptep, pte_t orig, int fullmm) { + bool huge = is_hugetlb_pte(orig); + if (tlb_type != hypervisor && pte_dirty(orig)) { unsigned long paddr, pfn = pte_pfn(orig); @@ -129,7 +139,7 @@ void tlb_batch_add(struct mm_struct *mm, unsigned long vaddr, no_cache_flush: if (!fullmm) - tlb_batch_add_one(mm, vaddr, pte_exec(orig)); + tlb_batch_add_one(mm, vaddr, pte_exec(orig), huge); } #ifdef CONFIG_TRANSPARENT_HUGEPAGE @@ -145,7 +155,7 @@ static void tlb_batch_pmd_scan(struct mm_struct *mm, unsigned long vaddr, if (pte_val(*pte) & _PAGE_VALID) { bool exec = pte_exec(*pte); - tlb_batch_add_one(mm, vaddr, exec); + tlb_batch_add_one(mm, vaddr, exec, false); } pte++; vaddr += PAGE_SIZE; @@ -165,9 +175,9 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr, if ((pmd_val(pmd) ^ pmd_val(orig)) & _PAGE_PMD_HUGE) { if (pmd_val(pmd) & _PAGE_PMD_HUGE) - mm->context.huge_pte_count++; + mm->context.thp_pte_count++; else - mm->context.huge_pte_count--; + mm->context.thp_pte_count--; /* Do not try to allocate the TSB hash table if we * don't have one already. We have various locks held @@ -185,8 +195,9 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr, pte_t orig_pte = __pte(pmd_val(orig)); bool exec = pte_exec(orig_pte); - tlb_batch_add_one(mm, addr, exec); - tlb_batch_add_one(mm, addr + REAL_HPAGE_SIZE, exec); + tlb_batch_add_one(mm, addr, exec, true); + tlb_batch_add_one(mm, addr + REAL_HPAGE_SIZE, exec, + true); } else { tlb_batch_pmd_scan(mm, addr, orig); } diff --git a/kernel/arch/sparc/mm/tsb.c b/kernel/arch/sparc/mm/tsb.c index a06576683..9cdeca0fa 100644 --- a/kernel/arch/sparc/mm/tsb.c +++ b/kernel/arch/sparc/mm/tsb.c @@ -27,6 +27,20 @@ static inline int tag_compare(unsigned long tag, unsigned long vaddr) return (tag == (vaddr >> 22)); } +static void flush_tsb_kernel_range_scan(unsigned long start, unsigned long end) +{ + unsigned long idx; + + for (idx = 0; idx < KERNEL_TSB_NENTRIES; idx++) { + struct tsb *ent = &swapper_tsb[idx]; + unsigned long match = idx << 13; + + match |= (ent->tag << 22); + if (match >= start && match < end) + ent->tag = (1UL << TSB_TAG_INVALID_BIT); + } +} + /* TSB flushes need only occur on the processor initiating the address * space modification, not on each cpu the address space has run on. * Only the TLB flush needs that treatment. @@ -36,6 +50,9 @@ void flush_tsb_kernel_range(unsigned long start, unsigned long end) { unsigned long v; + if ((end - start) >> PAGE_SHIFT >= 2 * KERNEL_TSB_NENTRIES) + return flush_tsb_kernel_range_scan(start, end); + for (v = start; v < end; v += PAGE_SIZE) { unsigned long hash = tsb_hash(v, PAGE_SHIFT, KERNEL_TSB_NENTRIES); @@ -76,14 +93,15 @@ void flush_tsb_user(struct tlb_batch *tb) spin_lock_irqsave(&mm->context.lock, flags); - base = (unsigned long) mm->context.tsb_block[MM_TSB_BASE].tsb; - nentries = mm->context.tsb_block[MM_TSB_BASE].tsb_nentries; - if (tlb_type == cheetah_plus || tlb_type == hypervisor) - base = __pa(base); - __flush_tsb_one(tb, PAGE_SHIFT, base, nentries); - + if (!tb->huge) { + base = (unsigned long) mm->context.tsb_block[MM_TSB_BASE].tsb; + nentries = mm->context.tsb_block[MM_TSB_BASE].tsb_nentries; + if (tlb_type == cheetah_plus || tlb_type == hypervisor) + base = __pa(base); + __flush_tsb_one(tb, PAGE_SHIFT, base, nentries); + } #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) - if (mm->context.tsb_block[MM_TSB_HUGE].tsb) { + if (tb->huge && mm->context.tsb_block[MM_TSB_HUGE].tsb) { base = (unsigned long) mm->context.tsb_block[MM_TSB_HUGE].tsb; nentries = mm->context.tsb_block[MM_TSB_HUGE].tsb_nentries; if (tlb_type == cheetah_plus || tlb_type == hypervisor) @@ -94,20 +112,21 @@ void flush_tsb_user(struct tlb_batch *tb) spin_unlock_irqrestore(&mm->context.lock, flags); } -void flush_tsb_user_page(struct mm_struct *mm, unsigned long vaddr) +void flush_tsb_user_page(struct mm_struct *mm, unsigned long vaddr, bool huge) { unsigned long nentries, base, flags; spin_lock_irqsave(&mm->context.lock, flags); - base = (unsigned long) mm->context.tsb_block[MM_TSB_BASE].tsb; - nentries = mm->context.tsb_block[MM_TSB_BASE].tsb_nentries; - if (tlb_type == cheetah_plus || tlb_type == hypervisor) - base = __pa(base); - __flush_tsb_one_entry(base, vaddr, PAGE_SHIFT, nentries); - + if (!huge) { + base = (unsigned long) mm->context.tsb_block[MM_TSB_BASE].tsb; + nentries = mm->context.tsb_block[MM_TSB_BASE].tsb_nentries; + if (tlb_type == cheetah_plus || tlb_type == hypervisor) + base = __pa(base); + __flush_tsb_one_entry(base, vaddr, PAGE_SHIFT, nentries); + } #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) - if (mm->context.tsb_block[MM_TSB_HUGE].tsb) { + if (huge && mm->context.tsb_block[MM_TSB_HUGE].tsb) { base = (unsigned long) mm->context.tsb_block[MM_TSB_HUGE].tsb; nentries = mm->context.tsb_block[MM_TSB_HUGE].tsb_nentries; if (tlb_type == cheetah_plus || tlb_type == hypervisor) @@ -468,7 +487,7 @@ retry_tsb_alloc: int init_new_context(struct task_struct *tsk, struct mm_struct *mm) { #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) - unsigned long huge_pte_count; + unsigned long total_huge_pte_count; #endif unsigned int i; @@ -477,12 +496,14 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm) mm->context.sparc64_ctx_val = 0UL; #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) - /* We reset it to zero because the fork() page copying + /* We reset them to zero because the fork() page copying * will re-increment the counters as the parent PTEs are * copied into the child address space. */ - huge_pte_count = mm->context.huge_pte_count; - mm->context.huge_pte_count = 0; + total_huge_pte_count = mm->context.hugetlb_pte_count + + mm->context.thp_pte_count; + mm->context.hugetlb_pte_count = 0; + mm->context.thp_pte_count = 0; #endif /* copy_mm() copies over the parent's mm_struct before calling @@ -498,8 +519,8 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm) tsb_grow(mm, MM_TSB_BASE, get_mm_rss(mm)); #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) - if (unlikely(huge_pte_count)) - tsb_grow(mm, MM_TSB_HUGE, huge_pte_count); + if (unlikely(total_huge_pte_count)) + tsb_grow(mm, MM_TSB_HUGE, total_huge_pte_count); #endif if (unlikely(!mm->context.tsb_block[MM_TSB_BASE].tsb)) diff --git a/kernel/arch/sparc/mm/ultra.S b/kernel/arch/sparc/mm/ultra.S index b4f4733ab..5d2fd6cd3 100644 --- a/kernel/arch/sparc/mm/ultra.S +++ b/kernel/arch/sparc/mm/ultra.S @@ -30,7 +30,7 @@ .text .align 32 .globl __flush_tlb_mm -__flush_tlb_mm: /* 18 insns */ +__flush_tlb_mm: /* 19 insns */ /* %o0=(ctx & TAG_CONTEXT_BITS), %o1=SECONDARY_CONTEXT */ ldxa [%o1] ASI_DMMU, %g2 cmp %g2, %o0 @@ -81,7 +81,7 @@ __flush_tlb_page: /* 22 insns */ .align 32 .globl __flush_tlb_pending -__flush_tlb_pending: /* 26 insns */ +__flush_tlb_pending: /* 27 insns */ /* %o0 = context, %o1 = nr, %o2 = vaddrs[] */ rdpr %pstate, %g7 sllx %o1, 3, %o1 @@ -113,12 +113,14 @@ __flush_tlb_pending: /* 26 insns */ .align 32 .globl __flush_tlb_kernel_range -__flush_tlb_kernel_range: /* 16 insns */ +__flush_tlb_kernel_range: /* 31 insns */ /* %o0=start, %o1=end */ cmp %o0, %o1 be,pn %xcc, 2f + sub %o1, %o0, %o3 + srlx %o3, 18, %o4 + brnz,pn %o4, __spitfire_flush_tlb_kernel_range_slow sethi %hi(PAGE_SIZE), %o4 - sub %o1, %o0, %o3 sub %o3, %o4, %o3 or %o0, 0x20, %o0 ! Nucleus 1: stxa %g0, [%o0 + %o3] ASI_DMMU_DEMAP @@ -131,6 +133,41 @@ __flush_tlb_kernel_range: /* 16 insns */ retl nop nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + +__spitfire_flush_tlb_kernel_range_slow: + mov 63 * 8, %o4 +1: ldxa [%o4] ASI_ITLB_DATA_ACCESS, %o3 + andcc %o3, 0x40, %g0 /* _PAGE_L_4U */ + bne,pn %xcc, 2f + mov TLB_TAG_ACCESS, %o3 + stxa %g0, [%o3] ASI_IMMU + stxa %g0, [%o4] ASI_ITLB_DATA_ACCESS + membar #Sync +2: ldxa [%o4] ASI_DTLB_DATA_ACCESS, %o3 + andcc %o3, 0x40, %g0 + bne,pn %xcc, 2f + mov TLB_TAG_ACCESS, %o3 + stxa %g0, [%o3] ASI_DMMU + stxa %g0, [%o4] ASI_DTLB_DATA_ACCESS + membar #Sync +2: sub %o4, 8, %o4 + brgez,pt %o4, 1b + nop + retl + nop __spitfire_flush_tlb_mm_slow: rdpr %pstate, %g1 @@ -285,6 +322,40 @@ __cheetah_flush_tlb_pending: /* 27 insns */ retl wrpr %g7, 0x0, %pstate +__cheetah_flush_tlb_kernel_range: /* 31 insns */ + /* %o0=start, %o1=end */ + cmp %o0, %o1 + be,pn %xcc, 2f + sub %o1, %o0, %o3 + srlx %o3, 18, %o4 + brnz,pn %o4, 3f + sethi %hi(PAGE_SIZE), %o4 + sub %o3, %o4, %o3 + or %o0, 0x20, %o0 ! Nucleus +1: stxa %g0, [%o0 + %o3] ASI_DMMU_DEMAP + stxa %g0, [%o0 + %o3] ASI_IMMU_DEMAP + membar #Sync + brnz,pt %o3, 1b + sub %o3, %o4, %o3 +2: sethi %hi(KERNBASE), %o3 + flush %o3 + retl + nop +3: mov 0x80, %o4 + stxa %g0, [%o4] ASI_DMMU_DEMAP + membar #Sync + stxa %g0, [%o4] ASI_IMMU_DEMAP + membar #Sync + retl + nop + nop + nop + nop + nop + nop + nop + nop + #ifdef DCACHE_ALIASING_POSSIBLE __cheetah_flush_dcache_page: /* 11 insns */ sethi %hi(PAGE_OFFSET), %g1 @@ -309,19 +380,28 @@ __hypervisor_tlb_tl0_error: ret restore -__hypervisor_flush_tlb_mm: /* 10 insns */ +__hypervisor_flush_tlb_mm: /* 19 insns */ mov %o0, %o2 /* ARG2: mmu context */ mov 0, %o0 /* ARG0: CPU lists unimplemented */ mov 0, %o1 /* ARG1: CPU lists unimplemented */ mov HV_MMU_ALL, %o3 /* ARG3: flags */ mov HV_FAST_MMU_DEMAP_CTX, %o5 ta HV_FAST_TRAP - brnz,pn %o0, __hypervisor_tlb_tl0_error + brnz,pn %o0, 1f mov HV_FAST_MMU_DEMAP_CTX, %o1 retl nop +1: sethi %hi(__hypervisor_tlb_tl0_error), %o5 + jmpl %o5 + %lo(__hypervisor_tlb_tl0_error), %g0 + nop + nop + nop + nop + nop + nop + nop -__hypervisor_flush_tlb_page: /* 11 insns */ +__hypervisor_flush_tlb_page: /* 22 insns */ /* %o0 = context, %o1 = vaddr */ mov %o0, %g2 mov %o1, %o0 /* ARG0: vaddr + IMMU-bit */ @@ -330,12 +410,23 @@ __hypervisor_flush_tlb_page: /* 11 insns */ srlx %o0, PAGE_SHIFT, %o0 sllx %o0, PAGE_SHIFT, %o0 ta HV_MMU_UNMAP_ADDR_TRAP - brnz,pn %o0, __hypervisor_tlb_tl0_error + brnz,pn %o0, 1f mov HV_MMU_UNMAP_ADDR_TRAP, %o1 retl nop +1: sethi %hi(__hypervisor_tlb_tl0_error), %o2 + jmpl %o2 + %lo(__hypervisor_tlb_tl0_error), %g0 + nop + nop + nop + nop + nop + nop + nop + nop + nop -__hypervisor_flush_tlb_pending: /* 16 insns */ +__hypervisor_flush_tlb_pending: /* 27 insns */ /* %o0 = context, %o1 = nr, %o2 = vaddrs[] */ sllx %o1, 3, %g1 mov %o2, %g2 @@ -347,31 +438,57 @@ __hypervisor_flush_tlb_pending: /* 16 insns */ srlx %o0, PAGE_SHIFT, %o0 sllx %o0, PAGE_SHIFT, %o0 ta HV_MMU_UNMAP_ADDR_TRAP - brnz,pn %o0, __hypervisor_tlb_tl0_error + brnz,pn %o0, 1f mov HV_MMU_UNMAP_ADDR_TRAP, %o1 brnz,pt %g1, 1b nop retl nop +1: sethi %hi(__hypervisor_tlb_tl0_error), %o2 + jmpl %o2 + %lo(__hypervisor_tlb_tl0_error), %g0 + nop + nop + nop + nop + nop + nop + nop + nop + nop -__hypervisor_flush_tlb_kernel_range: /* 16 insns */ +__hypervisor_flush_tlb_kernel_range: /* 31 insns */ /* %o0=start, %o1=end */ cmp %o0, %o1 be,pn %xcc, 2f - sethi %hi(PAGE_SIZE), %g3 - mov %o0, %g1 - sub %o1, %g1, %g2 + sub %o1, %o0, %g2 + srlx %g2, 18, %g3 + brnz,pn %g3, 4f + mov %o0, %g1 + sethi %hi(PAGE_SIZE), %g3 sub %g2, %g3, %g2 1: add %g1, %g2, %o0 /* ARG0: virtual address */ mov 0, %o1 /* ARG1: mmu context */ mov HV_MMU_ALL, %o2 /* ARG2: flags */ ta HV_MMU_UNMAP_ADDR_TRAP - brnz,pn %o0, __hypervisor_tlb_tl0_error + brnz,pn %o0, 3f mov HV_MMU_UNMAP_ADDR_TRAP, %o1 brnz,pt %g2, 1b sub %g2, %g3, %g2 2: retl nop +3: sethi %hi(__hypervisor_tlb_tl0_error), %o2 + jmpl %o2 + %lo(__hypervisor_tlb_tl0_error), %g0 + nop +4: mov 0, %o0 /* ARG0: CPU lists unimplemented */ + mov 0, %o1 /* ARG1: CPU lists unimplemented */ + mov 0, %o2 /* ARG2: mmu context == nucleus */ + mov HV_MMU_ALL, %o3 /* ARG3: flags */ + mov HV_FAST_MMU_DEMAP_CTX, %o5 + ta HV_FAST_TRAP + brnz,pn %o0, 3b + mov HV_FAST_MMU_DEMAP_CTX, %o1 + retl + nop #ifdef DCACHE_ALIASING_POSSIBLE /* XXX Niagara and friends have an 8K cache, so no aliasing is @@ -394,43 +511,6 @@ tlb_patch_one: retl nop - .globl cheetah_patch_cachetlbops -cheetah_patch_cachetlbops: - save %sp, -128, %sp - - sethi %hi(__flush_tlb_mm), %o0 - or %o0, %lo(__flush_tlb_mm), %o0 - sethi %hi(__cheetah_flush_tlb_mm), %o1 - or %o1, %lo(__cheetah_flush_tlb_mm), %o1 - call tlb_patch_one - mov 19, %o2 - - sethi %hi(__flush_tlb_page), %o0 - or %o0, %lo(__flush_tlb_page), %o0 - sethi %hi(__cheetah_flush_tlb_page), %o1 - or %o1, %lo(__cheetah_flush_tlb_page), %o1 - call tlb_patch_one - mov 22, %o2 - - sethi %hi(__flush_tlb_pending), %o0 - or %o0, %lo(__flush_tlb_pending), %o0 - sethi %hi(__cheetah_flush_tlb_pending), %o1 - or %o1, %lo(__cheetah_flush_tlb_pending), %o1 - call tlb_patch_one - mov 27, %o2 - -#ifdef DCACHE_ALIASING_POSSIBLE - sethi %hi(__flush_dcache_page), %o0 - or %o0, %lo(__flush_dcache_page), %o0 - sethi %hi(__cheetah_flush_dcache_page), %o1 - or %o1, %lo(__cheetah_flush_dcache_page), %o1 - call tlb_patch_one - mov 11, %o2 -#endif /* DCACHE_ALIASING_POSSIBLE */ - - ret - restore - #ifdef CONFIG_SMP /* These are all called by the slaves of a cross call, at * trap level 1, with interrupts fully disabled. @@ -447,7 +527,7 @@ cheetah_patch_cachetlbops: */ .align 32 .globl xcall_flush_tlb_mm -xcall_flush_tlb_mm: /* 21 insns */ +xcall_flush_tlb_mm: /* 24 insns */ mov PRIMARY_CONTEXT, %g2 ldxa [%g2] ASI_DMMU, %g3 srlx %g3, CTX_PGSZ1_NUC_SHIFT, %g4 @@ -469,9 +549,12 @@ xcall_flush_tlb_mm: /* 21 insns */ nop nop nop + nop + nop + nop .globl xcall_flush_tlb_page -xcall_flush_tlb_page: /* 17 insns */ +xcall_flush_tlb_page: /* 20 insns */ /* %g5=context, %g1=vaddr */ mov PRIMARY_CONTEXT, %g4 ldxa [%g4] ASI_DMMU, %g2 @@ -490,15 +573,20 @@ xcall_flush_tlb_page: /* 17 insns */ retry nop nop + nop + nop + nop .globl xcall_flush_tlb_kernel_range -xcall_flush_tlb_kernel_range: /* 25 insns */ +xcall_flush_tlb_kernel_range: /* 44 insns */ sethi %hi(PAGE_SIZE - 1), %g2 or %g2, %lo(PAGE_SIZE - 1), %g2 andn %g1, %g2, %g1 andn %g7, %g2, %g7 sub %g7, %g1, %g3 - add %g2, 1, %g2 + srlx %g3, 18, %g2 + brnz,pn %g2, 2f + add %g2, 1, %g2 sub %g3, %g2, %g3 or %g1, 0x20, %g1 ! Nucleus 1: stxa %g0, [%g1 + %g3] ASI_DMMU_DEMAP @@ -507,8 +595,25 @@ xcall_flush_tlb_kernel_range: /* 25 insns */ brnz,pt %g3, 1b sub %g3, %g2, %g3 retry - nop - nop +2: mov 63 * 8, %g1 +1: ldxa [%g1] ASI_ITLB_DATA_ACCESS, %g2 + andcc %g2, 0x40, %g0 /* _PAGE_L_4U */ + bne,pn %xcc, 2f + mov TLB_TAG_ACCESS, %g2 + stxa %g0, [%g2] ASI_IMMU + stxa %g0, [%g1] ASI_ITLB_DATA_ACCESS + membar #Sync +2: ldxa [%g1] ASI_DTLB_DATA_ACCESS, %g2 + andcc %g2, 0x40, %g0 + bne,pn %xcc, 2f + mov TLB_TAG_ACCESS, %g2 + stxa %g0, [%g2] ASI_DMMU + stxa %g0, [%g1] ASI_DTLB_DATA_ACCESS + membar #Sync +2: sub %g1, 8, %g1 + brgez,pt %g1, 1b + nop + retry nop nop nop @@ -637,6 +742,52 @@ xcall_fetch_glob_pmu_n4: retry +__cheetah_xcall_flush_tlb_kernel_range: /* 44 insns */ + sethi %hi(PAGE_SIZE - 1), %g2 + or %g2, %lo(PAGE_SIZE - 1), %g2 + andn %g1, %g2, %g1 + andn %g7, %g2, %g7 + sub %g7, %g1, %g3 + srlx %g3, 18, %g2 + brnz,pn %g2, 2f + add %g2, 1, %g2 + sub %g3, %g2, %g3 + or %g1, 0x20, %g1 ! Nucleus +1: stxa %g0, [%g1 + %g3] ASI_DMMU_DEMAP + stxa %g0, [%g1 + %g3] ASI_IMMU_DEMAP + membar #Sync + brnz,pt %g3, 1b + sub %g3, %g2, %g3 + retry +2: mov 0x80, %g2 + stxa %g0, [%g2] ASI_DMMU_DEMAP + membar #Sync + stxa %g0, [%g2] ASI_IMMU_DEMAP + membar #Sync + retry + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + #ifdef DCACHE_ALIASING_POSSIBLE .align 32 .globl xcall_flush_dcache_page_cheetah @@ -700,7 +851,7 @@ __hypervisor_tlb_xcall_error: ba,a,pt %xcc, rtrap .globl __hypervisor_xcall_flush_tlb_mm -__hypervisor_xcall_flush_tlb_mm: /* 21 insns */ +__hypervisor_xcall_flush_tlb_mm: /* 24 insns */ /* %g5=ctx, g1,g2,g3,g4,g7=scratch, %g6=unusable */ mov %o0, %g2 mov %o1, %g3 @@ -714,7 +865,7 @@ __hypervisor_xcall_flush_tlb_mm: /* 21 insns */ mov HV_FAST_MMU_DEMAP_CTX, %o5 ta HV_FAST_TRAP mov HV_FAST_MMU_DEMAP_CTX, %g6 - brnz,pn %o0, __hypervisor_tlb_xcall_error + brnz,pn %o0, 1f mov %o0, %g5 mov %g2, %o0 mov %g3, %o1 @@ -723,9 +874,12 @@ __hypervisor_xcall_flush_tlb_mm: /* 21 insns */ mov %g7, %o5 membar #Sync retry +1: sethi %hi(__hypervisor_tlb_xcall_error), %g4 + jmpl %g4 + %lo(__hypervisor_tlb_xcall_error), %g0 + nop .globl __hypervisor_xcall_flush_tlb_page -__hypervisor_xcall_flush_tlb_page: /* 17 insns */ +__hypervisor_xcall_flush_tlb_page: /* 20 insns */ /* %g5=ctx, %g1=vaddr */ mov %o0, %g2 mov %o1, %g3 @@ -737,42 +891,64 @@ __hypervisor_xcall_flush_tlb_page: /* 17 insns */ sllx %o0, PAGE_SHIFT, %o0 ta HV_MMU_UNMAP_ADDR_TRAP mov HV_MMU_UNMAP_ADDR_TRAP, %g6 - brnz,a,pn %o0, __hypervisor_tlb_xcall_error + brnz,a,pn %o0, 1f mov %o0, %g5 mov %g2, %o0 mov %g3, %o1 mov %g4, %o2 membar #Sync retry +1: sethi %hi(__hypervisor_tlb_xcall_error), %g4 + jmpl %g4 + %lo(__hypervisor_tlb_xcall_error), %g0 + nop .globl __hypervisor_xcall_flush_tlb_kernel_range -__hypervisor_xcall_flush_tlb_kernel_range: /* 25 insns */ +__hypervisor_xcall_flush_tlb_kernel_range: /* 44 insns */ /* %g1=start, %g7=end, g2,g3,g4,g5,g6=scratch */ sethi %hi(PAGE_SIZE - 1), %g2 or %g2, %lo(PAGE_SIZE - 1), %g2 andn %g1, %g2, %g1 andn %g7, %g2, %g7 sub %g7, %g1, %g3 + srlx %g3, 18, %g7 add %g2, 1, %g2 sub %g3, %g2, %g3 mov %o0, %g2 mov %o1, %g4 - mov %o2, %g7 + brnz,pn %g7, 2f + mov %o2, %g7 1: add %g1, %g3, %o0 /* ARG0: virtual address */ mov 0, %o1 /* ARG1: mmu context */ mov HV_MMU_ALL, %o2 /* ARG2: flags */ ta HV_MMU_UNMAP_ADDR_TRAP mov HV_MMU_UNMAP_ADDR_TRAP, %g6 - brnz,pn %o0, __hypervisor_tlb_xcall_error + brnz,pn %o0, 1f mov %o0, %g5 sethi %hi(PAGE_SIZE), %o2 brnz,pt %g3, 1b sub %g3, %o2, %g3 - mov %g2, %o0 +5: mov %g2, %o0 mov %g4, %o1 mov %g7, %o2 membar #Sync retry +1: sethi %hi(__hypervisor_tlb_xcall_error), %g4 + jmpl %g4 + %lo(__hypervisor_tlb_xcall_error), %g0 + nop +2: mov %o3, %g1 + mov %o5, %g3 + mov 0, %o0 /* ARG0: CPU lists unimplemented */ + mov 0, %o1 /* ARG1: CPU lists unimplemented */ + mov 0, %o2 /* ARG2: mmu context == nucleus */ + mov HV_MMU_ALL, %o3 /* ARG3: flags */ + mov HV_FAST_MMU_DEMAP_CTX, %o5 + ta HV_FAST_TRAP + mov %g1, %o3 + brz,pt %o0, 5b + mov %g3, %o5 + mov HV_FAST_MMU_DEMAP_CTX, %g6 + ba,pt %xcc, 1b + clr %g5 /* These just get rescheduled to PIL vectors. */ .globl xcall_call_function @@ -809,6 +985,58 @@ xcall_kgdb_capture: #endif /* CONFIG_SMP */ + .globl cheetah_patch_cachetlbops +cheetah_patch_cachetlbops: + save %sp, -128, %sp + + sethi %hi(__flush_tlb_mm), %o0 + or %o0, %lo(__flush_tlb_mm), %o0 + sethi %hi(__cheetah_flush_tlb_mm), %o1 + or %o1, %lo(__cheetah_flush_tlb_mm), %o1 + call tlb_patch_one + mov 19, %o2 + + sethi %hi(__flush_tlb_page), %o0 + or %o0, %lo(__flush_tlb_page), %o0 + sethi %hi(__cheetah_flush_tlb_page), %o1 + or %o1, %lo(__cheetah_flush_tlb_page), %o1 + call tlb_patch_one + mov 22, %o2 + + sethi %hi(__flush_tlb_pending), %o0 + or %o0, %lo(__flush_tlb_pending), %o0 + sethi %hi(__cheetah_flush_tlb_pending), %o1 + or %o1, %lo(__cheetah_flush_tlb_pending), %o1 + call tlb_patch_one + mov 27, %o2 + + sethi %hi(__flush_tlb_kernel_range), %o0 + or %o0, %lo(__flush_tlb_kernel_range), %o0 + sethi %hi(__cheetah_flush_tlb_kernel_range), %o1 + or %o1, %lo(__cheetah_flush_tlb_kernel_range), %o1 + call tlb_patch_one + mov 31, %o2 + +#ifdef DCACHE_ALIASING_POSSIBLE + sethi %hi(__flush_dcache_page), %o0 + or %o0, %lo(__flush_dcache_page), %o0 + sethi %hi(__cheetah_flush_dcache_page), %o1 + or %o1, %lo(__cheetah_flush_dcache_page), %o1 + call tlb_patch_one + mov 11, %o2 +#endif /* DCACHE_ALIASING_POSSIBLE */ + +#ifdef CONFIG_SMP + sethi %hi(xcall_flush_tlb_kernel_range), %o0 + or %o0, %lo(xcall_flush_tlb_kernel_range), %o0 + sethi %hi(__cheetah_xcall_flush_tlb_kernel_range), %o1 + or %o1, %lo(__cheetah_xcall_flush_tlb_kernel_range), %o1 + call tlb_patch_one + mov 44, %o2 +#endif /* CONFIG_SMP */ + + ret + restore .globl hypervisor_patch_cachetlbops hypervisor_patch_cachetlbops: @@ -819,28 +1047,28 @@ hypervisor_patch_cachetlbops: sethi %hi(__hypervisor_flush_tlb_mm), %o1 or %o1, %lo(__hypervisor_flush_tlb_mm), %o1 call tlb_patch_one - mov 10, %o2 + mov 19, %o2 sethi %hi(__flush_tlb_page), %o0 or %o0, %lo(__flush_tlb_page), %o0 sethi %hi(__hypervisor_flush_tlb_page), %o1 or %o1, %lo(__hypervisor_flush_tlb_page), %o1 call tlb_patch_one - mov 11, %o2 + mov 22, %o2 sethi %hi(__flush_tlb_pending), %o0 or %o0, %lo(__flush_tlb_pending), %o0 sethi %hi(__hypervisor_flush_tlb_pending), %o1 or %o1, %lo(__hypervisor_flush_tlb_pending), %o1 call tlb_patch_one - mov 16, %o2 + mov 27, %o2 sethi %hi(__flush_tlb_kernel_range), %o0 or %o0, %lo(__flush_tlb_kernel_range), %o0 sethi %hi(__hypervisor_flush_tlb_kernel_range), %o1 or %o1, %lo(__hypervisor_flush_tlb_kernel_range), %o1 call tlb_patch_one - mov 16, %o2 + mov 31, %o2 #ifdef DCACHE_ALIASING_POSSIBLE sethi %hi(__flush_dcache_page), %o0 @@ -857,21 +1085,21 @@ hypervisor_patch_cachetlbops: sethi %hi(__hypervisor_xcall_flush_tlb_mm), %o1 or %o1, %lo(__hypervisor_xcall_flush_tlb_mm), %o1 call tlb_patch_one - mov 21, %o2 + mov 24, %o2 sethi %hi(xcall_flush_tlb_page), %o0 or %o0, %lo(xcall_flush_tlb_page), %o0 sethi %hi(__hypervisor_xcall_flush_tlb_page), %o1 or %o1, %lo(__hypervisor_xcall_flush_tlb_page), %o1 call tlb_patch_one - mov 17, %o2 + mov 20, %o2 sethi %hi(xcall_flush_tlb_kernel_range), %o0 or %o0, %lo(xcall_flush_tlb_kernel_range), %o0 sethi %hi(__hypervisor_xcall_flush_tlb_kernel_range), %o1 or %o1, %lo(__hypervisor_xcall_flush_tlb_kernel_range), %o1 call tlb_patch_one - mov 25, %o2 + mov 44, %o2 #endif /* CONFIG_SMP */ ret -- cgit 1.2.3-korg