From 9ca8dbcc65cfc63d6f5ef3312a33184e1d726e00 Mon Sep 17 00:00:00 2001 From: Yunhong Jiang Date: Tue, 4 Aug 2015 12:17:53 -0700 Subject: Add the rt linux 4.1.3-rt3 as base Import the rt linux 4.1.3-rt3 as OPNFV kvm base. It's from git://git.kernel.org/pub/scm/linux/kernel/git/rt/linux-rt-devel.git linux-4.1.y-rt and the base is: commit 0917f823c59692d751951bf5ea699a2d1e2f26a2 Author: Sebastian Andrzej Siewior Date: Sat Jul 25 12:13:34 2015 +0200 Prepare v4.1.3-rt3 Signed-off-by: Sebastian Andrzej Siewior We lose all the git history this way and it's not good. We should apply another opnfv project repo in future. Change-Id: I87543d81c9df70d99c5001fbdf646b202c19f423 Signed-off-by: Yunhong Jiang --- kernel/arch/arc/lib/strcmp.S | 96 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 96 insertions(+) create mode 100644 kernel/arch/arc/lib/strcmp.S (limited to 'kernel/arch/arc/lib/strcmp.S') diff --git a/kernel/arch/arc/lib/strcmp.S b/kernel/arch/arc/lib/strcmp.S new file mode 100644 index 000000000..3544600fe --- /dev/null +++ b/kernel/arch/arc/lib/strcmp.S @@ -0,0 +1,96 @@ +/* + * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* This is optimized primarily for the ARC700. + It would be possible to speed up the loops by one cycle / word + respective one cycle / byte by forcing double source 1 alignment, unrolling + by a factor of two, and speculatively loading the second word / byte of + source 1; however, that would increase the overhead for loop setup / finish, + and strcmp might often terminate early. */ + +#include + +ENTRY(strcmp) + or r2,r0,r1 + bmsk_s r2,r2,1 + brne r2,0,.Lcharloop + mov_s r12,0x01010101 + ror r5,r12 +.Lwordloop: + ld.ab r2,[r0,4] + ld.ab r3,[r1,4] + nop_s + sub r4,r2,r12 + bic r4,r4,r2 + and r4,r4,r5 + brne r4,0,.Lfound0 + breq r2,r3,.Lwordloop +#ifdef __LITTLE_ENDIAN__ + xor r0,r2,r3 ; mask for difference + sub_s r1,r0,1 + bic_s r0,r0,r1 ; mask for least significant difference bit + sub r1,r5,r0 + xor r0,r5,r1 ; mask for least significant difference byte + and_s r2,r2,r0 + and_s r3,r3,r0 +#endif /* LITTLE ENDIAN */ + cmp_s r2,r3 + mov_s r0,1 + j_s.d [blink] + bset.lo r0,r0,31 + + .balign 4 +#ifdef __LITTLE_ENDIAN__ +.Lfound0: + xor r0,r2,r3 ; mask for difference + or r0,r0,r4 ; or in zero indicator + sub_s r1,r0,1 + bic_s r0,r0,r1 ; mask for least significant difference bit + sub r1,r5,r0 + xor r0,r5,r1 ; mask for least significant difference byte + and_s r2,r2,r0 + and_s r3,r3,r0 + sub.f r0,r2,r3 + mov.hi r0,1 + j_s.d [blink] + bset.lo r0,r0,31 +#else /* BIG ENDIAN */ + /* The zero-detection above can mis-detect 0x01 bytes as zeroes + because of carry-propagateion from a lower significant zero byte. + We can compensate for this by checking that bit0 is zero. + This compensation is not necessary in the step where we + get a low estimate for r2, because in any affected bytes + we already have 0x00 or 0x01, which will remain unchanged + when bit 7 is cleared. */ + .balign 4 +.Lfound0: + lsr r0,r4,8 + lsr_s r1,r2 + bic_s r2,r2,r0 ; get low estimate for r2 and get ... + bic_s r0,r0,r1 ; + or_s r3,r3,r0 ; ... high estimate r3 so that r2 > r3 will ... + cmp_s r3,r2 ; ... be independent of trailing garbage + or_s r2,r2,r0 ; likewise for r3 > r2 + bic_s r3,r3,r0 + rlc r0,0 ; r0 := r2 > r3 ? 1 : 0 + cmp_s r2,r3 + j_s.d [blink] + bset.lo r0,r0,31 +#endif /* ENDIAN */ + + .balign 4 +.Lcharloop: + ldb.ab r2,[r0,1] + ldb.ab r3,[r1,1] + nop_s + breq r2,0,.Lcmpend + breq r2,r3,.Lcharloop +.Lcmpend: + j_s.d [blink] + sub r0,r2,r3 +END(strcmp) -- cgit 1.2.3-korg