summaryrefslogtreecommitdiffstats
path: root/kernel/arch/sh/lib64/copy_page.S
diff options
context:
space:
mode:
authorYunhong Jiang <yunhong.jiang@intel.com>2015-08-04 12:17:53 -0700
committerYunhong Jiang <yunhong.jiang@intel.com>2015-08-04 15:44:42 -0700
commit9ca8dbcc65cfc63d6f5ef3312a33184e1d726e00 (patch)
tree1c9cafbcd35f783a87880a10f85d1a060db1a563 /kernel/arch/sh/lib64/copy_page.S
parent98260f3884f4a202f9ca5eabed40b1354c489b29 (diff)
Add the rt linux 4.1.3-rt3 as base
Import the rt linux 4.1.3-rt3 as OPNFV kvm base. It's from git://git.kernel.org/pub/scm/linux/kernel/git/rt/linux-rt-devel.git linux-4.1.y-rt and the base is: commit 0917f823c59692d751951bf5ea699a2d1e2f26a2 Author: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Date: Sat Jul 25 12:13:34 2015 +0200 Prepare v4.1.3-rt3 Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> We lose all the git history this way and it's not good. We should apply another opnfv project repo in future. Change-Id: I87543d81c9df70d99c5001fbdf646b202c19f423 Signed-off-by: Yunhong Jiang <yunhong.jiang@intel.com>
Diffstat (limited to 'kernel/arch/sh/lib64/copy_page.S')
-rw-r--r--kernel/arch/sh/lib64/copy_page.S89
1 files changed, 89 insertions, 0 deletions
diff --git a/kernel/arch/sh/lib64/copy_page.S b/kernel/arch/sh/lib64/copy_page.S
new file mode 100644
index 000000000..0ec6fca63
--- /dev/null
+++ b/kernel/arch/sh/lib64/copy_page.S
@@ -0,0 +1,89 @@
+/*
+ Copyright 2003 Richard Curnow, SuperH (UK) Ltd.
+
+ This file is subject to the terms and conditions of the GNU General Public
+ License. See the file "COPYING" in the main directory of this archive
+ for more details.
+
+ Tight version of mempy for the case of just copying a page.
+ Prefetch strategy empirically optimised against RTL simulations
+ of SH5-101 cut2 eval chip with Cayman board DDR memory.
+
+ Parameters:
+ r2 : destination effective address (start of page)
+ r3 : source effective address (start of page)
+
+ Always copies 4096 bytes.
+
+ Points to review.
+ * Currently the prefetch is 4 lines ahead and the alloco is 2 lines ahead.
+ It seems like the prefetch needs to be at at least 4 lines ahead to get
+ the data into the cache in time, and the allocos contend with outstanding
+ prefetches for the same cache set, so it's better to have the numbers
+ different.
+ */
+
+ .section .text..SHmedia32,"ax"
+ .little
+
+ .balign 8
+ .global copy_page
+copy_page:
+
+ /* Copy 4096 bytes worth of data from r3 to r2.
+ Do prefetches 4 lines ahead.
+ Do alloco 2 lines ahead */
+
+ pta 1f, tr1
+ pta 2f, tr2
+ pta 3f, tr3
+ ptabs r18, tr0
+
+#if 0
+ /* TAKum03020 */
+ ld.q r3, 0x00, r63
+ ld.q r3, 0x20, r63
+ ld.q r3, 0x40, r63
+ ld.q r3, 0x60, r63
+#endif
+ alloco r2, 0x00
+ synco ! TAKum03020
+ alloco r2, 0x20
+ synco ! TAKum03020
+
+ movi 3968, r6
+ add r2, r6, r6
+ addi r6, 64, r7
+ addi r7, 64, r8
+ sub r3, r2, r60
+ addi r60, 8, r61
+ addi r61, 8, r62
+ addi r62, 8, r23
+ addi r60, 0x80, r22
+
+/* Minimal code size. The extra branches inside the loop don't cost much
+ because they overlap with the time spent waiting for prefetches to
+ complete. */
+1:
+#if 0
+ /* TAKum03020 */
+ bge/u r2, r6, tr2 ! skip prefetch for last 4 lines
+ ldx.q r2, r22, r63 ! prefetch 4 lines hence
+#endif
+2:
+ bge/u r2, r7, tr3 ! skip alloco for last 2 lines
+ alloco r2, 0x40 ! alloc destination line 2 lines ahead
+ synco ! TAKum03020
+3:
+ ldx.q r2, r60, r36
+ ldx.q r2, r61, r37
+ ldx.q r2, r62, r38
+ ldx.q r2, r23, r39
+ st.q r2, 0, r36
+ st.q r2, 8, r37
+ st.q r2, 16, r38
+ st.q r2, 24, r39
+ addi r2, 32, r2
+ bgt/l r8, r2, tr1
+
+ blink tr0, r63 ! return