summaryrefslogtreecommitdiffstats
path: root/kernel/arch/sparc/lib/NG4memcpy.S
diff options
context:
space:
mode:
authorYunhong Jiang <yunhong.jiang@linux.intel.com>2017-03-08 23:13:28 -0800
committerYunhong Jiang <yunhong.jiang@linux.intel.com>2017-03-08 23:36:15 -0800
commit52f993b8e89487ec9ee15a7fb4979e0f09a45b27 (patch)
treed65304486afe0bea4a311c783c0d72791c8c0aa2 /kernel/arch/sparc/lib/NG4memcpy.S
parentc189ccac5702322ed843fe17057035b7222a59b6 (diff)
Upgrade to 4.4.50-rt62
The current kernel is based on rt kernel v4.4.6-rt14. We will upgrade it to 4.4.50-rt62. The command to achieve it is: a) Clone a git repo from git://git.kernel.org/pub/scm/linux/kernel/git/rt/linux-stable-rt.git b) Get the diff between this two changesets: git diff 640eca2901f3435e616157b11379d3223a44b391 705619beeea1b0b48219a683fd1a901a86fdaf5e where the two commits are: [yjiang5@jnakajim-build linux-stable-rt]$ git show --oneline --name-only 640eca2901f3435e616157b11379d3223a44b391 640eca2901f3 v4.4.6-rt14 localversion-rt [yjiang5@jnakajim-build linux-stable-rt]$ git show --oneline --name-only 705619beeea1b0b48219a683fd1a901a86fdaf5e 705619beeea1 Linux 4.4.50-rt62 localversion-rt c) One patch has been backported thus revert the patch before applying. filterdiff -p1 -x scripts/package/Makefile ~/tmp/v4.4.6-rt14-4.4.50-rt62.diff |patch -p1 --dry-run Upstream status: backport Change-Id: I244d57a32f6066e5a5b9915f9fbf99e7bbca6e01 Signed-off-by: Yunhong Jiang <yunhong.jiang@linux.intel.com>
Diffstat (limited to 'kernel/arch/sparc/lib/NG4memcpy.S')
-rw-r--r--kernel/arch/sparc/lib/NG4memcpy.S294
1 files changed, 223 insertions, 71 deletions
diff --git a/kernel/arch/sparc/lib/NG4memcpy.S b/kernel/arch/sparc/lib/NG4memcpy.S
index 8e13ee1f4..75bb93b14 100644
--- a/kernel/arch/sparc/lib/NG4memcpy.S
+++ b/kernel/arch/sparc/lib/NG4memcpy.S
@@ -4,6 +4,7 @@
*/
#ifdef __KERNEL__
+#include <linux/linkage.h>
#include <asm/visasm.h>
#include <asm/asi.h>
#define GLOBAL_SPARE %g7
@@ -46,22 +47,19 @@
#endif
#ifndef EX_LD
-#define EX_LD(x) x
+#define EX_LD(x,y) x
#endif
#ifndef EX_LD_FP
-#define EX_LD_FP(x) x
+#define EX_LD_FP(x,y) x
#endif
#ifndef EX_ST
-#define EX_ST(x) x
+#define EX_ST(x,y) x
#endif
#ifndef EX_ST_FP
-#define EX_ST_FP(x) x
+#define EX_ST_FP(x,y) x
#endif
-#ifndef EX_RETVAL
-#define EX_RETVAL(x) x
-#endif
#ifndef LOAD
#define LOAD(type,addr,dest) type [addr], dest
@@ -94,6 +92,158 @@
.register %g3,#scratch
.text
+#ifndef EX_RETVAL
+#define EX_RETVAL(x) x
+__restore_asi_fp:
+ VISExitHalf
+__restore_asi:
+ retl
+ wr %g0, ASI_AIUS, %asi
+
+ENTRY(NG4_retl_o2)
+ ba,pt %xcc, __restore_asi
+ mov %o2, %o0
+ENDPROC(NG4_retl_o2)
+ENTRY(NG4_retl_o2_plus_1)
+ ba,pt %xcc, __restore_asi
+ add %o2, 1, %o0
+ENDPROC(NG4_retl_o2_plus_1)
+ENTRY(NG4_retl_o2_plus_4)
+ ba,pt %xcc, __restore_asi
+ add %o2, 4, %o0
+ENDPROC(NG4_retl_o2_plus_4)
+ENTRY(NG4_retl_o2_plus_o5)
+ ba,pt %xcc, __restore_asi
+ add %o2, %o5, %o0
+ENDPROC(NG4_retl_o2_plus_o5)
+ENTRY(NG4_retl_o2_plus_o5_plus_4)
+ add %o5, 4, %o5
+ ba,pt %xcc, __restore_asi
+ add %o2, %o5, %o0
+ENDPROC(NG4_retl_o2_plus_o5_plus_4)
+ENTRY(NG4_retl_o2_plus_o5_plus_8)
+ add %o5, 8, %o5
+ ba,pt %xcc, __restore_asi
+ add %o2, %o5, %o0
+ENDPROC(NG4_retl_o2_plus_o5_plus_8)
+ENTRY(NG4_retl_o2_plus_o5_plus_16)
+ add %o5, 16, %o5
+ ba,pt %xcc, __restore_asi
+ add %o2, %o5, %o0
+ENDPROC(NG4_retl_o2_plus_o5_plus_16)
+ENTRY(NG4_retl_o2_plus_o5_plus_24)
+ add %o5, 24, %o5
+ ba,pt %xcc, __restore_asi
+ add %o2, %o5, %o0
+ENDPROC(NG4_retl_o2_plus_o5_plus_24)
+ENTRY(NG4_retl_o2_plus_o5_plus_32)
+ add %o5, 32, %o5
+ ba,pt %xcc, __restore_asi
+ add %o2, %o5, %o0
+ENDPROC(NG4_retl_o2_plus_o5_plus_32)
+ENTRY(NG4_retl_o2_plus_g1)
+ ba,pt %xcc, __restore_asi
+ add %o2, %g1, %o0
+ENDPROC(NG4_retl_o2_plus_g1)
+ENTRY(NG4_retl_o2_plus_g1_plus_1)
+ add %g1, 1, %g1
+ ba,pt %xcc, __restore_asi
+ add %o2, %g1, %o0
+ENDPROC(NG4_retl_o2_plus_g1_plus_1)
+ENTRY(NG4_retl_o2_plus_g1_plus_8)
+ add %g1, 8, %g1
+ ba,pt %xcc, __restore_asi
+ add %o2, %g1, %o0
+ENDPROC(NG4_retl_o2_plus_g1_plus_8)
+ENTRY(NG4_retl_o2_plus_o4)
+ ba,pt %xcc, __restore_asi
+ add %o2, %o4, %o0
+ENDPROC(NG4_retl_o2_plus_o4)
+ENTRY(NG4_retl_o2_plus_o4_plus_8)
+ add %o4, 8, %o4
+ ba,pt %xcc, __restore_asi
+ add %o2, %o4, %o0
+ENDPROC(NG4_retl_o2_plus_o4_plus_8)
+ENTRY(NG4_retl_o2_plus_o4_plus_16)
+ add %o4, 16, %o4
+ ba,pt %xcc, __restore_asi
+ add %o2, %o4, %o0
+ENDPROC(NG4_retl_o2_plus_o4_plus_16)
+ENTRY(NG4_retl_o2_plus_o4_plus_24)
+ add %o4, 24, %o4
+ ba,pt %xcc, __restore_asi
+ add %o2, %o4, %o0
+ENDPROC(NG4_retl_o2_plus_o4_plus_24)
+ENTRY(NG4_retl_o2_plus_o4_plus_32)
+ add %o4, 32, %o4
+ ba,pt %xcc, __restore_asi
+ add %o2, %o4, %o0
+ENDPROC(NG4_retl_o2_plus_o4_plus_32)
+ENTRY(NG4_retl_o2_plus_o4_plus_40)
+ add %o4, 40, %o4
+ ba,pt %xcc, __restore_asi
+ add %o2, %o4, %o0
+ENDPROC(NG4_retl_o2_plus_o4_plus_40)
+ENTRY(NG4_retl_o2_plus_o4_plus_48)
+ add %o4, 48, %o4
+ ba,pt %xcc, __restore_asi
+ add %o2, %o4, %o0
+ENDPROC(NG4_retl_o2_plus_o4_plus_48)
+ENTRY(NG4_retl_o2_plus_o4_plus_56)
+ add %o4, 56, %o4
+ ba,pt %xcc, __restore_asi
+ add %o2, %o4, %o0
+ENDPROC(NG4_retl_o2_plus_o4_plus_56)
+ENTRY(NG4_retl_o2_plus_o4_plus_64)
+ add %o4, 64, %o4
+ ba,pt %xcc, __restore_asi
+ add %o2, %o4, %o0
+ENDPROC(NG4_retl_o2_plus_o4_plus_64)
+ENTRY(NG4_retl_o2_plus_o4_fp)
+ ba,pt %xcc, __restore_asi_fp
+ add %o2, %o4, %o0
+ENDPROC(NG4_retl_o2_plus_o4_fp)
+ENTRY(NG4_retl_o2_plus_o4_plus_8_fp)
+ add %o4, 8, %o4
+ ba,pt %xcc, __restore_asi_fp
+ add %o2, %o4, %o0
+ENDPROC(NG4_retl_o2_plus_o4_plus_8_fp)
+ENTRY(NG4_retl_o2_plus_o4_plus_16_fp)
+ add %o4, 16, %o4
+ ba,pt %xcc, __restore_asi_fp
+ add %o2, %o4, %o0
+ENDPROC(NG4_retl_o2_plus_o4_plus_16_fp)
+ENTRY(NG4_retl_o2_plus_o4_plus_24_fp)
+ add %o4, 24, %o4
+ ba,pt %xcc, __restore_asi_fp
+ add %o2, %o4, %o0
+ENDPROC(NG4_retl_o2_plus_o4_plus_24_fp)
+ENTRY(NG4_retl_o2_plus_o4_plus_32_fp)
+ add %o4, 32, %o4
+ ba,pt %xcc, __restore_asi_fp
+ add %o2, %o4, %o0
+ENDPROC(NG4_retl_o2_plus_o4_plus_32_fp)
+ENTRY(NG4_retl_o2_plus_o4_plus_40_fp)
+ add %o4, 40, %o4
+ ba,pt %xcc, __restore_asi_fp
+ add %o2, %o4, %o0
+ENDPROC(NG4_retl_o2_plus_o4_plus_40_fp)
+ENTRY(NG4_retl_o2_plus_o4_plus_48_fp)
+ add %o4, 48, %o4
+ ba,pt %xcc, __restore_asi_fp
+ add %o2, %o4, %o0
+ENDPROC(NG4_retl_o2_plus_o4_plus_48_fp)
+ENTRY(NG4_retl_o2_plus_o4_plus_56_fp)
+ add %o4, 56, %o4
+ ba,pt %xcc, __restore_asi_fp
+ add %o2, %o4, %o0
+ENDPROC(NG4_retl_o2_plus_o4_plus_56_fp)
+ENTRY(NG4_retl_o2_plus_o4_plus_64_fp)
+ add %o4, 64, %o4
+ ba,pt %xcc, __restore_asi_fp
+ add %o2, %o4, %o0
+ENDPROC(NG4_retl_o2_plus_o4_plus_64_fp)
+#endif
.align 64
.globl FUNC_NAME
@@ -124,12 +274,13 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
brz,pt %g1, 51f
sub %o2, %g1, %o2
-1: EX_LD(LOAD(ldub, %o1 + 0x00, %g2))
+
+1: EX_LD(LOAD(ldub, %o1 + 0x00, %g2), NG4_retl_o2_plus_g1)
add %o1, 1, %o1
subcc %g1, 1, %g1
add %o0, 1, %o0
bne,pt %icc, 1b
- EX_ST(STORE(stb, %g2, %o0 - 0x01))
+ EX_ST(STORE(stb, %g2, %o0 - 0x01), NG4_retl_o2_plus_g1_plus_1)
51: LOAD(prefetch, %o1 + 0x040, #n_reads_strong)
LOAD(prefetch, %o1 + 0x080, #n_reads_strong)
@@ -154,43 +305,43 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
brz,pt %g1, .Llarge_aligned
sub %o2, %g1, %o2
-1: EX_LD(LOAD(ldx, %o1 + 0x00, %g2))
+1: EX_LD(LOAD(ldx, %o1 + 0x00, %g2), NG4_retl_o2_plus_g1)
add %o1, 8, %o1
subcc %g1, 8, %g1
add %o0, 8, %o0
bne,pt %icc, 1b
- EX_ST(STORE(stx, %g2, %o0 - 0x08))
+ EX_ST(STORE(stx, %g2, %o0 - 0x08), NG4_retl_o2_plus_g1_plus_8)
.Llarge_aligned:
/* len >= 0x80 && src 8-byte aligned && dest 8-byte aligned */
andn %o2, 0x3f, %o4
sub %o2, %o4, %o2
-1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1))
+1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1), NG4_retl_o2_plus_o4)
add %o1, 0x40, %o1
- EX_LD(LOAD(ldx, %o1 - 0x38, %g2))
+ EX_LD(LOAD(ldx, %o1 - 0x38, %g2), NG4_retl_o2_plus_o4)
subcc %o4, 0x40, %o4
- EX_LD(LOAD(ldx, %o1 - 0x30, %g3))
- EX_LD(LOAD(ldx, %o1 - 0x28, GLOBAL_SPARE))
- EX_LD(LOAD(ldx, %o1 - 0x20, %o5))
- EX_ST(STORE_INIT(%g1, %o0))
+ EX_LD(LOAD(ldx, %o1 - 0x30, %g3), NG4_retl_o2_plus_o4_plus_64)
+ EX_LD(LOAD(ldx, %o1 - 0x28, GLOBAL_SPARE), NG4_retl_o2_plus_o4_plus_64)
+ EX_LD(LOAD(ldx, %o1 - 0x20, %o5), NG4_retl_o2_plus_o4_plus_64)
+ EX_ST(STORE_INIT(%g1, %o0), NG4_retl_o2_plus_o4_plus_64)
add %o0, 0x08, %o0
- EX_ST(STORE_INIT(%g2, %o0))
+ EX_ST(STORE_INIT(%g2, %o0), NG4_retl_o2_plus_o4_plus_56)
add %o0, 0x08, %o0
- EX_LD(LOAD(ldx, %o1 - 0x18, %g2))
- EX_ST(STORE_INIT(%g3, %o0))
+ EX_LD(LOAD(ldx, %o1 - 0x18, %g2), NG4_retl_o2_plus_o4_plus_48)
+ EX_ST(STORE_INIT(%g3, %o0), NG4_retl_o2_plus_o4_plus_48)
add %o0, 0x08, %o0
- EX_LD(LOAD(ldx, %o1 - 0x10, %g3))
- EX_ST(STORE_INIT(GLOBAL_SPARE, %o0))
+ EX_LD(LOAD(ldx, %o1 - 0x10, %g3), NG4_retl_o2_plus_o4_plus_40)
+ EX_ST(STORE_INIT(GLOBAL_SPARE, %o0), NG4_retl_o2_plus_o4_plus_40)
add %o0, 0x08, %o0
- EX_LD(LOAD(ldx, %o1 - 0x08, GLOBAL_SPARE))
- EX_ST(STORE_INIT(%o5, %o0))
+ EX_LD(LOAD(ldx, %o1 - 0x08, GLOBAL_SPARE), NG4_retl_o2_plus_o4_plus_32)
+ EX_ST(STORE_INIT(%o5, %o0), NG4_retl_o2_plus_o4_plus_32)
add %o0, 0x08, %o0
- EX_ST(STORE_INIT(%g2, %o0))
+ EX_ST(STORE_INIT(%g2, %o0), NG4_retl_o2_plus_o4_plus_24)
add %o0, 0x08, %o0
- EX_ST(STORE_INIT(%g3, %o0))
+ EX_ST(STORE_INIT(%g3, %o0), NG4_retl_o2_plus_o4_plus_16)
add %o0, 0x08, %o0
- EX_ST(STORE_INIT(GLOBAL_SPARE, %o0))
+ EX_ST(STORE_INIT(GLOBAL_SPARE, %o0), NG4_retl_o2_plus_o4_plus_8)
add %o0, 0x08, %o0
bne,pt %icc, 1b
LOAD(prefetch, %o1 + 0x200, #n_reads_strong)
@@ -216,17 +367,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
sub %o2, %o4, %o2
alignaddr %o1, %g0, %g1
add %o1, %o4, %o1
- EX_LD_FP(LOAD(ldd, %g1 + 0x00, %f0))
-1: EX_LD_FP(LOAD(ldd, %g1 + 0x08, %f2))
+ EX_LD_FP(LOAD(ldd, %g1 + 0x00, %f0), NG4_retl_o2_plus_o4)
+1: EX_LD_FP(LOAD(ldd, %g1 + 0x08, %f2), NG4_retl_o2_plus_o4)
subcc %o4, 0x40, %o4
- EX_LD_FP(LOAD(ldd, %g1 + 0x10, %f4))
- EX_LD_FP(LOAD(ldd, %g1 + 0x18, %f6))
- EX_LD_FP(LOAD(ldd, %g1 + 0x20, %f8))
- EX_LD_FP(LOAD(ldd, %g1 + 0x28, %f10))
- EX_LD_FP(LOAD(ldd, %g1 + 0x30, %f12))
- EX_LD_FP(LOAD(ldd, %g1 + 0x38, %f14))
+ EX_LD_FP(LOAD(ldd, %g1 + 0x10, %f4), NG4_retl_o2_plus_o4_plus_64)
+ EX_LD_FP(LOAD(ldd, %g1 + 0x18, %f6), NG4_retl_o2_plus_o4_plus_64)
+ EX_LD_FP(LOAD(ldd, %g1 + 0x20, %f8), NG4_retl_o2_plus_o4_plus_64)
+ EX_LD_FP(LOAD(ldd, %g1 + 0x28, %f10), NG4_retl_o2_plus_o4_plus_64)
+ EX_LD_FP(LOAD(ldd, %g1 + 0x30, %f12), NG4_retl_o2_plus_o4_plus_64)
+ EX_LD_FP(LOAD(ldd, %g1 + 0x38, %f14), NG4_retl_o2_plus_o4_plus_64)
faligndata %f0, %f2, %f16
- EX_LD_FP(LOAD(ldd, %g1 + 0x40, %f0))
+ EX_LD_FP(LOAD(ldd, %g1 + 0x40, %f0), NG4_retl_o2_plus_o4_plus_64)
faligndata %f2, %f4, %f18
add %g1, 0x40, %g1
faligndata %f4, %f6, %f20
@@ -235,14 +386,14 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
faligndata %f10, %f12, %f26
faligndata %f12, %f14, %f28
faligndata %f14, %f0, %f30
- EX_ST_FP(STORE(std, %f16, %o0 + 0x00))
- EX_ST_FP(STORE(std, %f18, %o0 + 0x08))
- EX_ST_FP(STORE(std, %f20, %o0 + 0x10))
- EX_ST_FP(STORE(std, %f22, %o0 + 0x18))
- EX_ST_FP(STORE(std, %f24, %o0 + 0x20))
- EX_ST_FP(STORE(std, %f26, %o0 + 0x28))
- EX_ST_FP(STORE(std, %f28, %o0 + 0x30))
- EX_ST_FP(STORE(std, %f30, %o0 + 0x38))
+ EX_ST_FP(STORE(std, %f16, %o0 + 0x00), NG4_retl_o2_plus_o4_plus_64)
+ EX_ST_FP(STORE(std, %f18, %o0 + 0x08), NG4_retl_o2_plus_o4_plus_56)
+ EX_ST_FP(STORE(std, %f20, %o0 + 0x10), NG4_retl_o2_plus_o4_plus_48)
+ EX_ST_FP(STORE(std, %f22, %o0 + 0x18), NG4_retl_o2_plus_o4_plus_40)
+ EX_ST_FP(STORE(std, %f24, %o0 + 0x20), NG4_retl_o2_plus_o4_plus_32)
+ EX_ST_FP(STORE(std, %f26, %o0 + 0x28), NG4_retl_o2_plus_o4_plus_24)
+ EX_ST_FP(STORE(std, %f28, %o0 + 0x30), NG4_retl_o2_plus_o4_plus_16)
+ EX_ST_FP(STORE(std, %f30, %o0 + 0x38), NG4_retl_o2_plus_o4_plus_8)
add %o0, 0x40, %o0
bne,pt %icc, 1b
LOAD(prefetch, %g1 + 0x200, #n_reads_strong)
@@ -270,37 +421,38 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
andncc %o2, 0x20 - 1, %o5
be,pn %icc, 2f
sub %o2, %o5, %o2
-1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1))
- EX_LD(LOAD(ldx, %o1 + 0x08, %g2))
- EX_LD(LOAD(ldx, %o1 + 0x10, GLOBAL_SPARE))
- EX_LD(LOAD(ldx, %o1 + 0x18, %o4))
+1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1), NG4_retl_o2_plus_o5)
+ EX_LD(LOAD(ldx, %o1 + 0x08, %g2), NG4_retl_o2_plus_o5)
+ EX_LD(LOAD(ldx, %o1 + 0x10, GLOBAL_SPARE), NG4_retl_o2_plus_o5)
+ EX_LD(LOAD(ldx, %o1 + 0x18, %o4), NG4_retl_o2_plus_o5)
add %o1, 0x20, %o1
subcc %o5, 0x20, %o5
- EX_ST(STORE(stx, %g1, %o0 + 0x00))
- EX_ST(STORE(stx, %g2, %o0 + 0x08))
- EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x10))
- EX_ST(STORE(stx, %o4, %o0 + 0x18))
+ EX_ST(STORE(stx, %g1, %o0 + 0x00), NG4_retl_o2_plus_o5_plus_32)
+ EX_ST(STORE(stx, %g2, %o0 + 0x08), NG4_retl_o2_plus_o5_plus_24)
+ EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x10), NG4_retl_o2_plus_o5_plus_24)
+ EX_ST(STORE(stx, %o4, %o0 + 0x18), NG4_retl_o2_plus_o5_plus_8)
bne,pt %icc, 1b
add %o0, 0x20, %o0
2: andcc %o2, 0x18, %o5
be,pt %icc, 3f
sub %o2, %o5, %o2
-1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1))
+
+1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1), NG4_retl_o2_plus_o5)
add %o1, 0x08, %o1
add %o0, 0x08, %o0
subcc %o5, 0x08, %o5
bne,pt %icc, 1b
- EX_ST(STORE(stx, %g1, %o0 - 0x08))
+ EX_ST(STORE(stx, %g1, %o0 - 0x08), NG4_retl_o2_plus_o5_plus_8)
3: brz,pt %o2, .Lexit
cmp %o2, 0x04
bl,pn %icc, .Ltiny
nop
- EX_LD(LOAD(lduw, %o1 + 0x00, %g1))
+ EX_LD(LOAD(lduw, %o1 + 0x00, %g1), NG4_retl_o2)
add %o1, 0x04, %o1
add %o0, 0x04, %o0
subcc %o2, 0x04, %o2
bne,pn %icc, .Ltiny
- EX_ST(STORE(stw, %g1, %o0 - 0x04))
+ EX_ST(STORE(stw, %g1, %o0 - 0x04), NG4_retl_o2_plus_4)
ba,a,pt %icc, .Lexit
.Lmedium_unaligned:
/* First get dest 8 byte aligned. */
@@ -309,12 +461,12 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
brz,pt %g1, 2f
sub %o2, %g1, %o2
-1: EX_LD(LOAD(ldub, %o1 + 0x00, %g2))
+1: EX_LD(LOAD(ldub, %o1 + 0x00, %g2), NG4_retl_o2_plus_g1)
add %o1, 1, %o1
subcc %g1, 1, %g1
add %o0, 1, %o0
bne,pt %icc, 1b
- EX_ST(STORE(stb, %g2, %o0 - 0x01))
+ EX_ST(STORE(stb, %g2, %o0 - 0x01), NG4_retl_o2_plus_g1_plus_1)
2:
and %o1, 0x7, %g1
brz,pn %g1, .Lmedium_noprefetch
@@ -322,16 +474,16 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
mov 64, %g2
sub %g2, %g1, %g2
andn %o1, 0x7, %o1
- EX_LD(LOAD(ldx, %o1 + 0x00, %o4))
+ EX_LD(LOAD(ldx, %o1 + 0x00, %o4), NG4_retl_o2)
sllx %o4, %g1, %o4
andn %o2, 0x08 - 1, %o5
sub %o2, %o5, %o2
-1: EX_LD(LOAD(ldx, %o1 + 0x08, %g3))
+1: EX_LD(LOAD(ldx, %o1 + 0x08, %g3), NG4_retl_o2_plus_o5)
add %o1, 0x08, %o1
subcc %o5, 0x08, %o5
srlx %g3, %g2, GLOBAL_SPARE
or GLOBAL_SPARE, %o4, GLOBAL_SPARE
- EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x00))
+ EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x00), NG4_retl_o2_plus_o5_plus_8)
add %o0, 0x08, %o0
bne,pt %icc, 1b
sllx %g3, %g1, %o4
@@ -342,17 +494,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
ba,pt %icc, .Lsmall_unaligned
.Ltiny:
- EX_LD(LOAD(ldub, %o1 + 0x00, %g1))
+ EX_LD(LOAD(ldub, %o1 + 0x00, %g1), NG4_retl_o2)
subcc %o2, 1, %o2
be,pn %icc, .Lexit
- EX_ST(STORE(stb, %g1, %o0 + 0x00))
- EX_LD(LOAD(ldub, %o1 + 0x01, %g1))
+ EX_ST(STORE(stb, %g1, %o0 + 0x00), NG4_retl_o2_plus_1)
+ EX_LD(LOAD(ldub, %o1 + 0x01, %g1), NG4_retl_o2)
subcc %o2, 1, %o2
be,pn %icc, .Lexit
- EX_ST(STORE(stb, %g1, %o0 + 0x01))
- EX_LD(LOAD(ldub, %o1 + 0x02, %g1))
+ EX_ST(STORE(stb, %g1, %o0 + 0x01), NG4_retl_o2_plus_1)
+ EX_LD(LOAD(ldub, %o1 + 0x02, %g1), NG4_retl_o2)
ba,pt %icc, .Lexit
- EX_ST(STORE(stb, %g1, %o0 + 0x02))
+ EX_ST(STORE(stb, %g1, %o0 + 0x02), NG4_retl_o2)
.Lsmall:
andcc %g2, 0x3, %g0
@@ -360,22 +512,22 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
andn %o2, 0x4 - 1, %o5
sub %o2, %o5, %o2
1:
- EX_LD(LOAD(lduw, %o1 + 0x00, %g1))
+ EX_LD(LOAD(lduw, %o1 + 0x00, %g1), NG4_retl_o2_plus_o5)
add %o1, 0x04, %o1
subcc %o5, 0x04, %o5
add %o0, 0x04, %o0
bne,pt %icc, 1b
- EX_ST(STORE(stw, %g1, %o0 - 0x04))
+ EX_ST(STORE(stw, %g1, %o0 - 0x04), NG4_retl_o2_plus_o5_plus_4)
brz,pt %o2, .Lexit
nop
ba,a,pt %icc, .Ltiny
.Lsmall_unaligned:
-1: EX_LD(LOAD(ldub, %o1 + 0x00, %g1))
+1: EX_LD(LOAD(ldub, %o1 + 0x00, %g1), NG4_retl_o2)
add %o1, 1, %o1
add %o0, 1, %o0
subcc %o2, 1, %o2
bne,pt %icc, 1b
- EX_ST(STORE(stb, %g1, %o0 - 0x01))
+ EX_ST(STORE(stb, %g1, %o0 - 0x01), NG4_retl_o2_plus_1)
ba,a,pt %icc, .Lexit
.size FUNC_NAME, .-FUNC_NAME