diff options
Diffstat (limited to 'qemu/pixman/pixman/pixman-mips-memcpy-asm.S')
-rw-r--r-- | qemu/pixman/pixman/pixman-mips-memcpy-asm.S | 382 |
1 files changed, 0 insertions, 382 deletions
diff --git a/qemu/pixman/pixman/pixman-mips-memcpy-asm.S b/qemu/pixman/pixman/pixman-mips-memcpy-asm.S deleted file mode 100644 index 9ad6da537..000000000 --- a/qemu/pixman/pixman/pixman-mips-memcpy-asm.S +++ /dev/null @@ -1,382 +0,0 @@ -/* - * Copyright (c) 2012 - * MIPS Technologies, Inc., California. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include "pixman-mips-dspr2-asm.h" - -/* - * This routine could be optimized for MIPS64. The current code only - * uses MIPS32 instructions. - */ - -#ifdef EB -# define LWHI lwl /* high part is left in big-endian */ -# define SWHI swl /* high part is left in big-endian */ -# define LWLO lwr /* low part is right in big-endian */ -# define SWLO swr /* low part is right in big-endian */ -#else -# define LWHI lwr /* high part is right in little-endian */ -# define SWHI swr /* high part is right in little-endian */ -# define LWLO lwl /* low part is left in big-endian */ -# define SWLO swl /* low part is left in big-endian */ -#endif - -LEAF_MIPS32R2(pixman_mips_fast_memcpy) - - slti AT, a2, 8 - bne AT, zero, $last8 - move v0, a0 /* memcpy returns the dst pointer */ - -/* Test if the src and dst are word-aligned, or can be made word-aligned */ - xor t8, a1, a0 - andi t8, t8, 0x3 /* t8 is a0/a1 word-displacement */ - - bne t8, zero, $unaligned - negu a3, a0 - - andi a3, a3, 0x3 /* we need to copy a3 bytes to make a0/a1 aligned */ - beq a3, zero, $chk16w /* when a3=0 then the dst (a0) is word-aligned */ - subu a2, a2, a3 /* now a2 is the remining bytes count */ - - LWHI t8, 0(a1) - addu a1, a1, a3 - SWHI t8, 0(a0) - addu a0, a0, a3 - -/* Now the dst/src are mutually word-aligned with word-aligned addresses */ -$chk16w: andi t8, a2, 0x3f /* any whole 64-byte chunks? */ - /* t8 is the byte count after 64-byte chunks */ - - beq a2, t8, $chk8w /* if a2==t8, no 64-byte chunks */ - /* There will be at most 1 32-byte chunk after it */ - subu a3, a2, t8 /* subtract from a2 the reminder */ - /* Here a3 counts bytes in 16w chunks */ - addu a3, a0, a3 /* Now a3 is the final dst after 64-byte chunks */ - - addu t0, a0, a2 /* t0 is the "past the end" address */ - -/* - * When in the loop we exercise "pref 30, x(a0)", the a0+x should not be past - * the "t0-32" address - * This means: for x=128 the last "safe" a0 address is "t0-160" - * Alternatively, for x=64 the last "safe" a0 address is "t0-96" - * In the current version we use "pref 30, 128(a0)", so "t0-160" is the limit - */ - subu t9, t0, 160 /* t9 is the "last safe pref 30, 128(a0)" address */ - - pref 0, 0(a1) /* bring the first line of src, addr 0 */ - pref 0, 32(a1) /* bring the second line of src, addr 32 */ - pref 0, 64(a1) /* bring the third line of src, addr 64 */ - pref 30, 32(a0) /* safe, as we have at least 64 bytes ahead */ -/* In case the a0 > t9 don't use "pref 30" at all */ - sgtu v1, a0, t9 - bgtz v1, $loop16w /* skip "pref 30, 64(a0)" for too short arrays */ - nop -/* otherwise, start with using pref30 */ - pref 30, 64(a0) -$loop16w: - pref 0, 96(a1) - lw t0, 0(a1) - bgtz v1, $skip_pref30_96 /* skip "pref 30, 96(a0)" */ - lw t1, 4(a1) - pref 30, 96(a0) /* continue setting up the dest, addr 96 */ -$skip_pref30_96: - lw t2, 8(a1) - lw t3, 12(a1) - lw t4, 16(a1) - lw t5, 20(a1) - lw t6, 24(a1) - lw t7, 28(a1) - pref 0, 128(a1) /* bring the next lines of src, addr 128 */ - - sw t0, 0(a0) - sw t1, 4(a0) - sw t2, 8(a0) - sw t3, 12(a0) - sw t4, 16(a0) - sw t5, 20(a0) - sw t6, 24(a0) - sw t7, 28(a0) - - lw t0, 32(a1) - bgtz v1, $skip_pref30_128 /* skip "pref 30, 128(a0)" */ - lw t1, 36(a1) - pref 30, 128(a0) /* continue setting up the dest, addr 128 */ -$skip_pref30_128: - lw t2, 40(a1) - lw t3, 44(a1) - lw t4, 48(a1) - lw t5, 52(a1) - lw t6, 56(a1) - lw t7, 60(a1) - pref 0, 160(a1) /* bring the next lines of src, addr 160 */ - - sw t0, 32(a0) - sw t1, 36(a0) - sw t2, 40(a0) - sw t3, 44(a0) - sw t4, 48(a0) - sw t5, 52(a0) - sw t6, 56(a0) - sw t7, 60(a0) - - addiu a0, a0, 64 /* adding 64 to dest */ - sgtu v1, a0, t9 - bne a0, a3, $loop16w - addiu a1, a1, 64 /* adding 64 to src */ - move a2, t8 - -/* Here we have src and dest word-aligned but less than 64-bytes to go */ - -$chk8w: - pref 0, 0x0(a1) - andi t8, a2, 0x1f /* is there a 32-byte chunk? */ - /* the t8 is the reminder count past 32-bytes */ - beq a2, t8, $chk1w /* when a2=t8, no 32-byte chunk */ - nop - - lw t0, 0(a1) - lw t1, 4(a1) - lw t2, 8(a1) - lw t3, 12(a1) - lw t4, 16(a1) - lw t5, 20(a1) - lw t6, 24(a1) - lw t7, 28(a1) - addiu a1, a1, 32 - - sw t0, 0(a0) - sw t1, 4(a0) - sw t2, 8(a0) - sw t3, 12(a0) - sw t4, 16(a0) - sw t5, 20(a0) - sw t6, 24(a0) - sw t7, 28(a0) - addiu a0, a0, 32 - -$chk1w: - andi a2, t8, 0x3 /* now a2 is the reminder past 1w chunks */ - beq a2, t8, $last8 - subu a3, t8, a2 /* a3 is count of bytes in 1w chunks */ - addu a3, a0, a3 /* now a3 is the dst address past the 1w chunks */ - -/* copying in words (4-byte chunks) */ -$wordCopy_loop: - lw t3, 0(a1) /* the first t3 may be equal t0 ... optimize? */ - addiu a1, a1, 4 - addiu a0, a0, 4 - bne a0, a3, $wordCopy_loop - sw t3, -4(a0) - -/* For the last (<8) bytes */ -$last8: - blez a2, leave - addu a3, a0, a2 /* a3 is the last dst address */ -$last8loop: - lb v1, 0(a1) - addiu a1, a1, 1 - addiu a0, a0, 1 - bne a0, a3, $last8loop - sb v1, -1(a0) - -leave: j ra - nop - -/* - * UNALIGNED case - */ - -$unaligned: - /* got here with a3="negu a0" */ - andi a3, a3, 0x3 /* test if the a0 is word aligned */ - beqz a3, $ua_chk16w - subu a2, a2, a3 /* bytes left after initial a3 bytes */ - - LWHI v1, 0(a1) - LWLO v1, 3(a1) - addu a1, a1, a3 /* a3 may be here 1, 2 or 3 */ - SWHI v1, 0(a0) - addu a0, a0, a3 /* below the dst will be word aligned (NOTE1) */ - -$ua_chk16w: andi t8, a2, 0x3f /* any whole 64-byte chunks? */ - /* t8 is the byte count after 64-byte chunks */ - beq a2, t8, $ua_chk8w /* if a2==t8, no 64-byte chunks */ - /* There will be at most 1 32-byte chunk after it */ - subu a3, a2, t8 /* subtract from a2 the reminder */ - /* Here a3 counts bytes in 16w chunks */ - addu a3, a0, a3 /* Now a3 is the final dst after 64-byte chunks */ - - addu t0, a0, a2 /* t0 is the "past the end" address */ - - subu t9, t0, 160 /* t9 is the "last safe pref 30, 128(a0)" address */ - - pref 0, 0(a1) /* bring the first line of src, addr 0 */ - pref 0, 32(a1) /* bring the second line of src, addr 32 */ - pref 0, 64(a1) /* bring the third line of src, addr 64 */ - pref 30, 32(a0) /* safe, as we have at least 64 bytes ahead */ -/* In case the a0 > t9 don't use "pref 30" at all */ - sgtu v1, a0, t9 - bgtz v1, $ua_loop16w /* skip "pref 30, 64(a0)" for too short arrays */ - nop -/* otherwise, start with using pref30 */ - pref 30, 64(a0) -$ua_loop16w: - pref 0, 96(a1) - LWHI t0, 0(a1) - LWLO t0, 3(a1) - LWHI t1, 4(a1) - bgtz v1, $ua_skip_pref30_96 - LWLO t1, 7(a1) - pref 30, 96(a0) /* continue setting up the dest, addr 96 */ -$ua_skip_pref30_96: - LWHI t2, 8(a1) - LWLO t2, 11(a1) - LWHI t3, 12(a1) - LWLO t3, 15(a1) - LWHI t4, 16(a1) - LWLO t4, 19(a1) - LWHI t5, 20(a1) - LWLO t5, 23(a1) - LWHI t6, 24(a1) - LWLO t6, 27(a1) - LWHI t7, 28(a1) - LWLO t7, 31(a1) - pref 0, 128(a1) /* bring the next lines of src, addr 128 */ - - sw t0, 0(a0) - sw t1, 4(a0) - sw t2, 8(a0) - sw t3, 12(a0) - sw t4, 16(a0) - sw t5, 20(a0) - sw t6, 24(a0) - sw t7, 28(a0) - - LWHI t0, 32(a1) - LWLO t0, 35(a1) - LWHI t1, 36(a1) - bgtz v1, $ua_skip_pref30_128 - LWLO t1, 39(a1) - pref 30, 128(a0) /* continue setting up the dest, addr 128 */ -$ua_skip_pref30_128: - LWHI t2, 40(a1) - LWLO t2, 43(a1) - LWHI t3, 44(a1) - LWLO t3, 47(a1) - LWHI t4, 48(a1) - LWLO t4, 51(a1) - LWHI t5, 52(a1) - LWLO t5, 55(a1) - LWHI t6, 56(a1) - LWLO t6, 59(a1) - LWHI t7, 60(a1) - LWLO t7, 63(a1) - pref 0, 160(a1) /* bring the next lines of src, addr 160 */ - - sw t0, 32(a0) - sw t1, 36(a0) - sw t2, 40(a0) - sw t3, 44(a0) - sw t4, 48(a0) - sw t5, 52(a0) - sw t6, 56(a0) - sw t7, 60(a0) - - addiu a0, a0, 64 /* adding 64 to dest */ - sgtu v1, a0, t9 - bne a0, a3, $ua_loop16w - addiu a1, a1, 64 /* adding 64 to src */ - move a2, t8 - -/* Here we have src and dest word-aligned but less than 64-bytes to go */ - -$ua_chk8w: - pref 0, 0x0(a1) - andi t8, a2, 0x1f /* is there a 32-byte chunk? */ - /* the t8 is the reminder count */ - beq a2, t8, $ua_chk1w /* when a2=t8, no 32-byte chunk */ - - LWHI t0, 0(a1) - LWLO t0, 3(a1) - LWHI t1, 4(a1) - LWLO t1, 7(a1) - LWHI t2, 8(a1) - LWLO t2, 11(a1) - LWHI t3, 12(a1) - LWLO t3, 15(a1) - LWHI t4, 16(a1) - LWLO t4, 19(a1) - LWHI t5, 20(a1) - LWLO t5, 23(a1) - LWHI t6, 24(a1) - LWLO t6, 27(a1) - LWHI t7, 28(a1) - LWLO t7, 31(a1) - addiu a1, a1, 32 - - sw t0, 0(a0) - sw t1, 4(a0) - sw t2, 8(a0) - sw t3, 12(a0) - sw t4, 16(a0) - sw t5, 20(a0) - sw t6, 24(a0) - sw t7, 28(a0) - addiu a0, a0, 32 - -$ua_chk1w: - andi a2, t8, 0x3 /* now a2 is the reminder past 1w chunks */ - beq a2, t8, $ua_smallCopy - subu a3, t8, a2 /* a3 is count of bytes in 1w chunks */ - addu a3, a0, a3 /* now a3 is the dst address past the 1w chunks */ - -/* copying in words (4-byte chunks) */ -$ua_wordCopy_loop: - LWHI v1, 0(a1) - LWLO v1, 3(a1) - addiu a1, a1, 4 - addiu a0, a0, 4 /* note: dst=a0 is word aligned here, see NOTE1 */ - bne a0, a3, $ua_wordCopy_loop - sw v1, -4(a0) - -/* Now less than 4 bytes (value in a2) left to copy */ -$ua_smallCopy: - beqz a2, leave - addu a3, a0, a2 /* a3 is the last dst address */ -$ua_smallCopy_loop: - lb v1, 0(a1) - addiu a1, a1, 1 - addiu a0, a0, 1 - bne a0, a3, $ua_smallCopy_loop - sb v1, -1(a0) - - j ra - nop - -END(pixman_mips_fast_memcpy) |