From 9ca8dbcc65cfc63d6f5ef3312a33184e1d726e00 Mon Sep 17 00:00:00 2001 From: Yunhong Jiang Date: Tue, 4 Aug 2015 12:17:53 -0700 Subject: Add the rt linux 4.1.3-rt3 as base Import the rt linux 4.1.3-rt3 as OPNFV kvm base. It's from git://git.kernel.org/pub/scm/linux/kernel/git/rt/linux-rt-devel.git linux-4.1.y-rt and the base is: commit 0917f823c59692d751951bf5ea699a2d1e2f26a2 Author: Sebastian Andrzej Siewior Date: Sat Jul 25 12:13:34 2015 +0200 Prepare v4.1.3-rt3 Signed-off-by: Sebastian Andrzej Siewior We lose all the git history this way and it's not good. We should apply another opnfv project repo in future. Change-Id: I87543d81c9df70d99c5001fbdf646b202c19f423 Signed-off-by: Yunhong Jiang --- kernel/arch/arm/lib/Makefile | 41 +++ kernel/arch/arm/lib/ashldi3.S | 54 ++++ kernel/arch/arm/lib/ashrdi3.S | 54 ++++ kernel/arch/arm/lib/backtrace.S | 152 +++++++++++ kernel/arch/arm/lib/bitops.h | 106 ++++++++ kernel/arch/arm/lib/bswapsdi2.S | 37 +++ kernel/arch/arm/lib/call_with_stack.S | 44 ++++ kernel/arch/arm/lib/changebit.S | 15 ++ kernel/arch/arm/lib/clear_user.S | 54 ++++ kernel/arch/arm/lib/clearbit.S | 15 ++ kernel/arch/arm/lib/copy_from_user.S | 109 ++++++++ kernel/arch/arm/lib/copy_page.S | 47 ++++ kernel/arch/arm/lib/copy_template.S | 297 ++++++++++++++++++++++ kernel/arch/arm/lib/copy_to_user.S | 111 ++++++++ kernel/arch/arm/lib/csumipv6.S | 33 +++ kernel/arch/arm/lib/csumpartial.S | 142 +++++++++++ kernel/arch/arm/lib/csumpartialcopy.S | 53 ++++ kernel/arch/arm/lib/csumpartialcopygeneric.S | 333 ++++++++++++++++++++++++ kernel/arch/arm/lib/csumpartialcopyuser.S | 83 ++++++ kernel/arch/arm/lib/delay-loop.S | 68 +++++ kernel/arch/arm/lib/delay.c | 117 +++++++++ kernel/arch/arm/lib/div64.S | 212 ++++++++++++++++ kernel/arch/arm/lib/ecard.S | 44 ++++ kernel/arch/arm/lib/findbit.S | 196 +++++++++++++++ kernel/arch/arm/lib/floppydma.S | 32 +++ kernel/arch/arm/lib/getuser.S | 149 +++++++++++ kernel/arch/arm/lib/io-acorn.S | 32 +++ kernel/arch/arm/lib/io-readsb.S | 123 +++++++++ kernel/arch/arm/lib/io-readsl.S | 79 ++++++ kernel/arch/arm/lib/io-readsw-armv3.S | 106 ++++++++ kernel/arch/arm/lib/io-readsw-armv4.S | 131 ++++++++++ kernel/arch/arm/lib/io-writesb.S | 94 +++++++ kernel/arch/arm/lib/io-writesl.S | 67 +++++ kernel/arch/arm/lib/io-writesw-armv3.S | 126 ++++++++++ kernel/arch/arm/lib/io-writesw-armv4.S | 100 ++++++++ kernel/arch/arm/lib/lib1funcs.S | 363 +++++++++++++++++++++++++++ kernel/arch/arm/lib/lshrdi3.S | 54 ++++ kernel/arch/arm/lib/memchr.S | 26 ++ kernel/arch/arm/lib/memcpy.S | 68 +++++ kernel/arch/arm/lib/memmove.S | 227 +++++++++++++++++ kernel/arch/arm/lib/memset.S | 135 ++++++++++ kernel/arch/arm/lib/memzero.S | 137 ++++++++++ kernel/arch/arm/lib/muldi3.S | 48 ++++ kernel/arch/arm/lib/putuser.S | 98 ++++++++ kernel/arch/arm/lib/setbit.S | 15 ++ kernel/arch/arm/lib/strchr.S | 27 ++ kernel/arch/arm/lib/strrchr.S | 26 ++ kernel/arch/arm/lib/testchangebit.S | 15 ++ kernel/arch/arm/lib/testclearbit.S | 15 ++ kernel/arch/arm/lib/testsetbit.S | 15 ++ kernel/arch/arm/lib/uaccess_with_memcpy.c | 270 ++++++++++++++++++++ kernel/arch/arm/lib/ucmpdi2.S | 53 ++++ kernel/arch/arm/lib/xor-neon.c | 46 ++++ 53 files changed, 5094 insertions(+) create mode 100644 kernel/arch/arm/lib/Makefile create mode 100644 kernel/arch/arm/lib/ashldi3.S create mode 100644 kernel/arch/arm/lib/ashrdi3.S create mode 100644 kernel/arch/arm/lib/backtrace.S create mode 100644 kernel/arch/arm/lib/bitops.h create mode 100644 kernel/arch/arm/lib/bswapsdi2.S create mode 100644 kernel/arch/arm/lib/call_with_stack.S create mode 100644 kernel/arch/arm/lib/changebit.S create mode 100644 kernel/arch/arm/lib/clear_user.S create mode 100644 kernel/arch/arm/lib/clearbit.S create mode 100644 kernel/arch/arm/lib/copy_from_user.S create mode 100644 kernel/arch/arm/lib/copy_page.S create mode 100644 kernel/arch/arm/lib/copy_template.S create mode 100644 kernel/arch/arm/lib/copy_to_user.S create mode 100644 kernel/arch/arm/lib/csumipv6.S create mode 100644 kernel/arch/arm/lib/csumpartial.S create mode 100644 kernel/arch/arm/lib/csumpartialcopy.S create mode 100644 kernel/arch/arm/lib/csumpartialcopygeneric.S create mode 100644 kernel/arch/arm/lib/csumpartialcopyuser.S create mode 100644 kernel/arch/arm/lib/delay-loop.S create mode 100644 kernel/arch/arm/lib/delay.c create mode 100644 kernel/arch/arm/lib/div64.S create mode 100644 kernel/arch/arm/lib/ecard.S create mode 100644 kernel/arch/arm/lib/findbit.S create mode 100644 kernel/arch/arm/lib/floppydma.S create mode 100644 kernel/arch/arm/lib/getuser.S create mode 100644 kernel/arch/arm/lib/io-acorn.S create mode 100644 kernel/arch/arm/lib/io-readsb.S create mode 100644 kernel/arch/arm/lib/io-readsl.S create mode 100644 kernel/arch/arm/lib/io-readsw-armv3.S create mode 100644 kernel/arch/arm/lib/io-readsw-armv4.S create mode 100644 kernel/arch/arm/lib/io-writesb.S create mode 100644 kernel/arch/arm/lib/io-writesl.S create mode 100644 kernel/arch/arm/lib/io-writesw-armv3.S create mode 100644 kernel/arch/arm/lib/io-writesw-armv4.S create mode 100644 kernel/arch/arm/lib/lib1funcs.S create mode 100644 kernel/arch/arm/lib/lshrdi3.S create mode 100644 kernel/arch/arm/lib/memchr.S create mode 100644 kernel/arch/arm/lib/memcpy.S create mode 100644 kernel/arch/arm/lib/memmove.S create mode 100644 kernel/arch/arm/lib/memset.S create mode 100644 kernel/arch/arm/lib/memzero.S create mode 100644 kernel/arch/arm/lib/muldi3.S create mode 100644 kernel/arch/arm/lib/putuser.S create mode 100644 kernel/arch/arm/lib/setbit.S create mode 100644 kernel/arch/arm/lib/strchr.S create mode 100644 kernel/arch/arm/lib/strrchr.S create mode 100644 kernel/arch/arm/lib/testchangebit.S create mode 100644 kernel/arch/arm/lib/testclearbit.S create mode 100644 kernel/arch/arm/lib/testsetbit.S create mode 100644 kernel/arch/arm/lib/uaccess_with_memcpy.c create mode 100644 kernel/arch/arm/lib/ucmpdi2.S create mode 100644 kernel/arch/arm/lib/xor-neon.c (limited to 'kernel/arch/arm/lib') diff --git a/kernel/arch/arm/lib/Makefile b/kernel/arch/arm/lib/Makefile new file mode 100644 index 000000000..d8a780799 --- /dev/null +++ b/kernel/arch/arm/lib/Makefile @@ -0,0 +1,41 @@ +# +# linux/arch/arm/lib/Makefile +# +# Copyright (C) 1995-2000 Russell King +# + +lib-y := backtrace.o changebit.o csumipv6.o csumpartial.o \ + csumpartialcopy.o csumpartialcopyuser.o clearbit.o \ + delay.o delay-loop.o findbit.o memchr.o memcpy.o \ + memmove.o memset.o memzero.o setbit.o \ + strchr.o strrchr.o \ + testchangebit.o testclearbit.o testsetbit.o \ + ashldi3.o ashrdi3.o lshrdi3.o muldi3.o \ + ucmpdi2.o lib1funcs.o div64.o \ + io-readsb.o io-writesb.o io-readsl.o io-writesl.o \ + call_with_stack.o bswapsdi2.o + +mmu-y := clear_user.o copy_page.o getuser.o putuser.o \ + copy_from_user.o copy_to_user.o + +# using lib_ here won't override already available weak symbols +obj-$(CONFIG_UACCESS_WITH_MEMCPY) += uaccess_with_memcpy.o + +lib-$(CONFIG_MMU) += $(mmu-y) + +ifeq ($(CONFIG_CPU_32v3),y) + lib-y += io-readsw-armv3.o io-writesw-armv3.o +else + lib-y += io-readsw-armv4.o io-writesw-armv4.o +endif + +lib-$(CONFIG_ARCH_RPC) += ecard.o io-acorn.o floppydma.o + +$(obj)/csumpartialcopy.o: $(obj)/csumpartialcopygeneric.S +$(obj)/csumpartialcopyuser.o: $(obj)/csumpartialcopygeneric.S + +ifeq ($(CONFIG_KERNEL_MODE_NEON),y) + NEON_FLAGS := -mfloat-abi=softfp -mfpu=neon + CFLAGS_xor-neon.o += $(NEON_FLAGS) + obj-$(CONFIG_XOR_BLOCKS) += xor-neon.o +endif diff --git a/kernel/arch/arm/lib/ashldi3.S b/kernel/arch/arm/lib/ashldi3.S new file mode 100644 index 000000000..b05e95840 --- /dev/null +++ b/kernel/arch/arm/lib/ashldi3.S @@ -0,0 +1,54 @@ +/* Copyright 1995, 1996, 1998, 1999, 2000, 2003, 2004, 2005 + Free Software Foundation, Inc. + +This file is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +In addition to the permissions in the GNU General Public License, the +Free Software Foundation gives you unlimited permission to link the +compiled version of this file into combinations with other programs, +and to distribute those combinations without any restriction coming +from the use of this file. (The General Public License restrictions +do apply in other respects; for example, they cover modification of +the file, and distribution when not linked into a combine +executable.) + +This file is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; see the file COPYING. If not, write to +the Free Software Foundation, 51 Franklin Street, Fifth Floor, +Boston, MA 02110-1301, USA. */ + + +#include +#include + +#ifdef __ARMEB__ +#define al r1 +#define ah r0 +#else +#define al r0 +#define ah r1 +#endif + +ENTRY(__ashldi3) +ENTRY(__aeabi_llsl) + + subs r3, r2, #32 + rsb ip, r2, #32 + movmi ah, ah, lsl r2 + movpl ah, al, lsl r3 + ARM( orrmi ah, ah, al, lsr ip ) + THUMB( lsrmi r3, al, ip ) + THUMB( orrmi ah, ah, r3 ) + mov al, al, lsl r2 + ret lr + +ENDPROC(__ashldi3) +ENDPROC(__aeabi_llsl) diff --git a/kernel/arch/arm/lib/ashrdi3.S b/kernel/arch/arm/lib/ashrdi3.S new file mode 100644 index 000000000..275d7d234 --- /dev/null +++ b/kernel/arch/arm/lib/ashrdi3.S @@ -0,0 +1,54 @@ +/* Copyright 1995, 1996, 1998, 1999, 2000, 2003, 2004, 2005 + Free Software Foundation, Inc. + +This file is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +In addition to the permissions in the GNU General Public License, the +Free Software Foundation gives you unlimited permission to link the +compiled version of this file into combinations with other programs, +and to distribute those combinations without any restriction coming +from the use of this file. (The General Public License restrictions +do apply in other respects; for example, they cover modification of +the file, and distribution when not linked into a combine +executable.) + +This file is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; see the file COPYING. If not, write to +the Free Software Foundation, 51 Franklin Street, Fifth Floor, +Boston, MA 02110-1301, USA. */ + + +#include +#include + +#ifdef __ARMEB__ +#define al r1 +#define ah r0 +#else +#define al r0 +#define ah r1 +#endif + +ENTRY(__ashrdi3) +ENTRY(__aeabi_lasr) + + subs r3, r2, #32 + rsb ip, r2, #32 + movmi al, al, lsr r2 + movpl al, ah, asr r3 + ARM( orrmi al, al, ah, lsl ip ) + THUMB( lslmi r3, ah, ip ) + THUMB( orrmi al, al, r3 ) + mov ah, ah, asr r2 + ret lr + +ENDPROC(__ashrdi3) +ENDPROC(__aeabi_lasr) diff --git a/kernel/arch/arm/lib/backtrace.S b/kernel/arch/arm/lib/backtrace.S new file mode 100644 index 000000000..fab5a5050 --- /dev/null +++ b/kernel/arch/arm/lib/backtrace.S @@ -0,0 +1,152 @@ +/* + * linux/arch/arm/lib/backtrace.S + * + * Copyright (C) 1995, 1996 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * 27/03/03 Ian Molton Clean up CONFIG_CPU + * + */ +#include +#include + .text + +@ fp is 0 or stack frame + +#define frame r4 +#define sv_fp r5 +#define sv_pc r6 +#define mask r7 +#define offset r8 + +ENTRY(c_backtrace) + +#if !defined(CONFIG_FRAME_POINTER) || !defined(CONFIG_PRINTK) + ret lr +ENDPROC(c_backtrace) +#else + stmfd sp!, {r4 - r8, lr} @ Save an extra register so we have a location... + movs frame, r0 @ if frame pointer is zero + beq no_frame @ we have no stack frames + + tst r1, #0x10 @ 26 or 32-bit mode? + ARM( moveq mask, #0xfc000003 ) + THUMB( moveq mask, #0xfc000000 ) + THUMB( orreq mask, #0x03 ) + movne mask, #0 @ mask for 32-bit + +1: stmfd sp!, {pc} @ calculate offset of PC stored + ldr r0, [sp], #4 @ by stmfd for this CPU + adr r1, 1b + sub offset, r0, r1 + +/* + * Stack frame layout: + * optionally saved caller registers (r4 - r10) + * saved fp + * saved sp + * saved lr + * frame => saved pc + * optionally saved arguments (r0 - r3) + * saved sp => + * + * Functions start with the following code sequence: + * mov ip, sp + * stmfd sp!, {r0 - r3} (optional) + * corrected pc => stmfd sp!, {..., fp, ip, lr, pc} + */ +for_each_frame: tst frame, mask @ Check for address exceptions + bne no_frame + +1001: ldr sv_pc, [frame, #0] @ get saved pc +1002: ldr sv_fp, [frame, #-12] @ get saved fp + + sub sv_pc, sv_pc, offset @ Correct PC for prefetching + bic sv_pc, sv_pc, mask @ mask PC/LR for the mode + +1003: ldr r2, [sv_pc, #-4] @ if stmfd sp!, {args} exists, + ldr r3, .Ldsi+4 @ adjust saved 'pc' back one + teq r3, r2, lsr #10 @ instruction + subne r0, sv_pc, #4 @ allow for mov + subeq r0, sv_pc, #8 @ allow for mov + stmia + + ldr r1, [frame, #-4] @ get saved lr + mov r2, frame + bic r1, r1, mask @ mask PC/LR for the mode + bl dump_backtrace_entry + + ldr r1, [sv_pc, #-4] @ if stmfd sp!, {args} exists, + ldr r3, .Ldsi+4 + teq r3, r1, lsr #11 + ldreq r0, [frame, #-8] @ get sp + subeq r0, r0, #4 @ point at the last arg + bleq .Ldumpstm @ dump saved registers + +1004: ldr r1, [sv_pc, #0] @ if stmfd sp!, {..., fp, ip, lr, pc} + ldr r3, .Ldsi @ instruction exists, + teq r3, r1, lsr #11 + subeq r0, frame, #16 + bleq .Ldumpstm @ dump saved registers + + teq sv_fp, #0 @ zero saved fp means + beq no_frame @ no further frames + + cmp sv_fp, frame @ next frame must be + mov frame, sv_fp @ above the current frame + bhi for_each_frame + +1006: adr r0, .Lbad + mov r1, frame + bl printk +no_frame: ldmfd sp!, {r4 - r8, pc} +ENDPROC(c_backtrace) + + .pushsection __ex_table,"a" + .align 3 + .long 1001b, 1006b + .long 1002b, 1006b + .long 1003b, 1006b + .long 1004b, 1006b + .popsection + +#define instr r4 +#define reg r5 +#define stack r6 + +.Ldumpstm: stmfd sp!, {instr, reg, stack, r7, lr} + mov stack, r0 + mov instr, r1 + mov reg, #10 + mov r7, #0 +1: mov r3, #1 + ARM( tst instr, r3, lsl reg ) + THUMB( lsl r3, reg ) + THUMB( tst instr, r3 ) + beq 2f + add r7, r7, #1 + teq r7, #6 + moveq r7, #0 + adr r3, .Lcr + addne r3, r3, #1 @ skip newline + ldr r2, [stack], #-4 + mov r1, reg + adr r0, .Lfp + bl printk +2: subs reg, reg, #1 + bpl 1b + teq r7, #0 + adrne r0, .Lcr + blne printk + ldmfd sp!, {instr, reg, stack, r7, pc} + +.Lfp: .asciz " r%d:%08x%s" +.Lcr: .asciz "\n" +.Lbad: .asciz "Backtrace aborted due to bad frame pointer <%p>\n" + .align +.Ldsi: .word 0xe92dd800 >> 11 @ stmfd sp!, {... fp, ip, lr, pc} + .word 0xe92d0000 >> 11 @ stmfd sp!, {} + +#endif diff --git a/kernel/arch/arm/lib/bitops.h b/kernel/arch/arm/lib/bitops.h new file mode 100644 index 000000000..7d807cfd8 --- /dev/null +++ b/kernel/arch/arm/lib/bitops.h @@ -0,0 +1,106 @@ +#include +#include + +#if __LINUX_ARM_ARCH__ >= 6 + .macro bitop, name, instr +ENTRY( \name ) +UNWIND( .fnstart ) + ands ip, r1, #3 + strneb r1, [ip] @ assert word-aligned + mov r2, #1 + and r3, r0, #31 @ Get bit offset + mov r0, r0, lsr #5 + add r1, r1, r0, lsl #2 @ Get word offset +#if __LINUX_ARM_ARCH__ >= 7 && defined(CONFIG_SMP) + .arch_extension mp + ALT_SMP(W(pldw) [r1]) + ALT_UP(W(nop)) +#endif + mov r3, r2, lsl r3 +1: ldrex r2, [r1] + \instr r2, r2, r3 + strex r0, r2, [r1] + cmp r0, #0 + bne 1b + bx lr +UNWIND( .fnend ) +ENDPROC(\name ) + .endm + + .macro testop, name, instr, store +ENTRY( \name ) +UNWIND( .fnstart ) + ands ip, r1, #3 + strneb r1, [ip] @ assert word-aligned + mov r2, #1 + and r3, r0, #31 @ Get bit offset + mov r0, r0, lsr #5 + add r1, r1, r0, lsl #2 @ Get word offset + mov r3, r2, lsl r3 @ create mask + smp_dmb +#if __LINUX_ARM_ARCH__ >= 7 && defined(CONFIG_SMP) + .arch_extension mp + ALT_SMP(W(pldw) [r1]) + ALT_UP(W(nop)) +#endif +1: ldrex r2, [r1] + ands r0, r2, r3 @ save old value of bit + \instr r2, r2, r3 @ toggle bit + strex ip, r2, [r1] + cmp ip, #0 + bne 1b + smp_dmb + cmp r0, #0 + movne r0, #1 +2: bx lr +UNWIND( .fnend ) +ENDPROC(\name ) + .endm +#else + .macro bitop, name, instr +ENTRY( \name ) +UNWIND( .fnstart ) + ands ip, r1, #3 + strneb r1, [ip] @ assert word-aligned + and r2, r0, #31 + mov r0, r0, lsr #5 + mov r3, #1 + mov r3, r3, lsl r2 + save_and_disable_irqs ip + ldr r2, [r1, r0, lsl #2] + \instr r2, r2, r3 + str r2, [r1, r0, lsl #2] + restore_irqs ip + ret lr +UNWIND( .fnend ) +ENDPROC(\name ) + .endm + +/** + * testop - implement a test_and_xxx_bit operation. + * @instr: operational instruction + * @store: store instruction + * + * Note: we can trivially conditionalise the store instruction + * to avoid dirtying the data cache. + */ + .macro testop, name, instr, store +ENTRY( \name ) +UNWIND( .fnstart ) + ands ip, r1, #3 + strneb r1, [ip] @ assert word-aligned + and r3, r0, #31 + mov r0, r0, lsr #5 + save_and_disable_irqs ip + ldr r2, [r1, r0, lsl #2]! + mov r0, #1 + tst r2, r0, lsl r3 + \instr r2, r2, r0, lsl r3 + \store r2, [r1] + moveq r0, #0 + restore_irqs ip + ret lr +UNWIND( .fnend ) +ENDPROC(\name ) + .endm +#endif diff --git a/kernel/arch/arm/lib/bswapsdi2.S b/kernel/arch/arm/lib/bswapsdi2.S new file mode 100644 index 000000000..07cda737b --- /dev/null +++ b/kernel/arch/arm/lib/bswapsdi2.S @@ -0,0 +1,37 @@ +#include +#include + +#if __LINUX_ARM_ARCH__ >= 6 +ENTRY(__bswapsi2) + rev r0, r0 + bx lr +ENDPROC(__bswapsi2) + +ENTRY(__bswapdi2) + rev r3, r0 + rev r0, r1 + mov r1, r3 + bx lr +ENDPROC(__bswapdi2) +#else +ENTRY(__bswapsi2) + eor r3, r0, r0, ror #16 + mov r3, r3, lsr #8 + bic r3, r3, #0xff00 + eor r0, r3, r0, ror #8 + ret lr +ENDPROC(__bswapsi2) + +ENTRY(__bswapdi2) + mov ip, r1 + eor r3, ip, ip, ror #16 + eor r1, r0, r0, ror #16 + mov r1, r1, lsr #8 + mov r3, r3, lsr #8 + bic r3, r3, #0xff00 + bic r1, r1, #0xff00 + eor r1, r1, r0, ror #8 + eor r0, r3, ip, ror #8 + ret lr +ENDPROC(__bswapdi2) +#endif diff --git a/kernel/arch/arm/lib/call_with_stack.S b/kernel/arch/arm/lib/call_with_stack.S new file mode 100644 index 000000000..ed1a42181 --- /dev/null +++ b/kernel/arch/arm/lib/call_with_stack.S @@ -0,0 +1,44 @@ +/* + * arch/arm/lib/call_with_stack.S + * + * Copyright (C) 2011 ARM Ltd. + * Written by Will Deacon + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include + +/* + * void call_with_stack(void (*fn)(void *), void *arg, void *sp) + * + * Change the stack to that pointed at by sp, then invoke fn(arg) with + * the new stack. + */ +ENTRY(call_with_stack) + str sp, [r2, #-4]! + str lr, [r2, #-4]! + + mov sp, r2 + mov r2, r0 + mov r0, r1 + + adr lr, BSYM(1f) + ret r2 + +1: ldr lr, [sp] + ldr sp, [sp, #4] + ret lr +ENDPROC(call_with_stack) diff --git a/kernel/arch/arm/lib/changebit.S b/kernel/arch/arm/lib/changebit.S new file mode 100644 index 000000000..f40278621 --- /dev/null +++ b/kernel/arch/arm/lib/changebit.S @@ -0,0 +1,15 @@ +/* + * linux/arch/arm/lib/changebit.S + * + * Copyright (C) 1995-1996 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include +#include "bitops.h" + .text + +bitop _change_bit, eor diff --git a/kernel/arch/arm/lib/clear_user.S b/kernel/arch/arm/lib/clear_user.S new file mode 100644 index 000000000..1710fd7db --- /dev/null +++ b/kernel/arch/arm/lib/clear_user.S @@ -0,0 +1,54 @@ +/* + * linux/arch/arm/lib/clear_user.S + * + * Copyright (C) 1995, 1996,1997,1998 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include + + .text + +/* Prototype: int __clear_user(void *addr, size_t sz) + * Purpose : clear some user memory + * Params : addr - user memory address to clear + * : sz - number of bytes to clear + * Returns : number of bytes NOT cleared + */ +ENTRY(__clear_user_std) +WEAK(__clear_user) + stmfd sp!, {r1, lr} + mov r2, #0 + cmp r1, #4 + blt 2f + ands ip, r0, #3 + beq 1f + cmp ip, #2 + strusr r2, r0, 1 + strusr r2, r0, 1, le + strusr r2, r0, 1, lt + rsb ip, ip, #4 + sub r1, r1, ip @ 7 6 5 4 3 2 1 +1: subs r1, r1, #8 @ -1 -2 -3 -4 -5 -6 -7 + strusr r2, r0, 4, pl, rept=2 + bpl 1b + adds r1, r1, #4 @ 3 2 1 0 -1 -2 -3 + strusr r2, r0, 4, pl +2: tst r1, #2 @ 1x 1x 0x 0x 1x 1x 0x + strusr r2, r0, 1, ne, rept=2 + tst r1, #1 @ x1 x0 x1 x0 x1 x0 x1 + it ne @ explicit IT needed for the label +USER( strnebt r2, [r0]) + mov r0, #0 + ldmfd sp!, {r1, pc} +ENDPROC(__clear_user) +ENDPROC(__clear_user_std) + + .pushsection .text.fixup,"ax" + .align 0 +9001: ldmfd sp!, {r0, pc} + .popsection + diff --git a/kernel/arch/arm/lib/clearbit.S b/kernel/arch/arm/lib/clearbit.S new file mode 100644 index 000000000..f6b75fb64 --- /dev/null +++ b/kernel/arch/arm/lib/clearbit.S @@ -0,0 +1,15 @@ +/* + * linux/arch/arm/lib/clearbit.S + * + * Copyright (C) 1995-1996 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include +#include "bitops.h" + .text + +bitop _clear_bit, bic diff --git a/kernel/arch/arm/lib/copy_from_user.S b/kernel/arch/arm/lib/copy_from_user.S new file mode 100644 index 000000000..7a235b995 --- /dev/null +++ b/kernel/arch/arm/lib/copy_from_user.S @@ -0,0 +1,109 @@ +/* + * linux/arch/arm/lib/copy_from_user.S + * + * Author: Nicolas Pitre + * Created: Sep 29, 2005 + * Copyright: MontaVista Software, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include + +/* + * Prototype: + * + * size_t __copy_from_user(void *to, const void *from, size_t n) + * + * Purpose: + * + * copy a block to kernel memory from user memory + * + * Params: + * + * to = kernel memory + * from = user memory + * n = number of bytes to copy + * + * Return value: + * + * Number of bytes NOT copied. + */ + +#ifndef CONFIG_THUMB2_KERNEL +#define LDR1W_SHIFT 0 +#else +#define LDR1W_SHIFT 1 +#endif +#define STR1W_SHIFT 0 + + .macro ldr1w ptr reg abort + ldrusr \reg, \ptr, 4, abort=\abort + .endm + + .macro ldr4w ptr reg1 reg2 reg3 reg4 abort + ldr1w \ptr, \reg1, \abort + ldr1w \ptr, \reg2, \abort + ldr1w \ptr, \reg3, \abort + ldr1w \ptr, \reg4, \abort + .endm + + .macro ldr8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort + ldr4w \ptr, \reg1, \reg2, \reg3, \reg4, \abort + ldr4w \ptr, \reg5, \reg6, \reg7, \reg8, \abort + .endm + + .macro ldr1b ptr reg cond=al abort + ldrusr \reg, \ptr, 1, \cond, abort=\abort + .endm + + .macro str1w ptr reg abort + W(str) \reg, [\ptr], #4 + .endm + + .macro str8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort + stmia \ptr!, {\reg1, \reg2, \reg3, \reg4, \reg5, \reg6, \reg7, \reg8} + .endm + + .macro str1b ptr reg cond=al abort + str\cond\()b \reg, [\ptr], #1 + .endm + + .macro enter reg1 reg2 + mov r3, #0 + stmdb sp!, {r0, r2, r3, \reg1, \reg2} + .endm + + .macro usave reg1 reg2 + UNWIND( .save {r0, r2, r3, \reg1, \reg2} ) + .endm + + .macro exit reg1 reg2 + add sp, sp, #8 + ldmfd sp!, {r0, \reg1, \reg2} + .endm + + .text + +ENTRY(__copy_from_user) + +#include "copy_template.S" + +ENDPROC(__copy_from_user) + + .pushsection .fixup,"ax" + .align 0 + copy_abort_preamble + ldmfd sp!, {r1, r2} + sub r3, r0, r1 + rsb r1, r3, r2 + str r1, [sp] + bl __memzero + ldr r0, [sp], #4 + copy_abort_end + .popsection + diff --git a/kernel/arch/arm/lib/copy_page.S b/kernel/arch/arm/lib/copy_page.S new file mode 100644 index 000000000..6ee2f6706 --- /dev/null +++ b/kernel/arch/arm/lib/copy_page.S @@ -0,0 +1,47 @@ +/* + * linux/arch/arm/lib/copypage.S + * + * Copyright (C) 1995-1999 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * ASM optimised string functions + */ +#include +#include +#include +#include + +#define COPY_COUNT (PAGE_SZ / (2 * L1_CACHE_BYTES) PLD( -1 )) + + .text + .align 5 +/* + * StrongARM optimised copy_page routine + * now 1.78bytes/cycle, was 1.60 bytes/cycle (50MHz bus -> 89MB/s) + * Note that we probably achieve closer to the 100MB/s target with + * the core clock switching. + */ +ENTRY(copy_page) + stmfd sp!, {r4, lr} @ 2 + PLD( pld [r1, #0] ) + PLD( pld [r1, #L1_CACHE_BYTES] ) + mov r2, #COPY_COUNT @ 1 + ldmia r1!, {r3, r4, ip, lr} @ 4+1 +1: PLD( pld [r1, #2 * L1_CACHE_BYTES]) + PLD( pld [r1, #3 * L1_CACHE_BYTES]) +2: + .rept (2 * L1_CACHE_BYTES / 16 - 1) + stmia r0!, {r3, r4, ip, lr} @ 4 + ldmia r1!, {r3, r4, ip, lr} @ 4 + .endr + subs r2, r2, #1 @ 1 + stmia r0!, {r3, r4, ip, lr} @ 4 + ldmgtia r1!, {r3, r4, ip, lr} @ 4 + bgt 1b @ 1 + PLD( ldmeqia r1!, {r3, r4, ip, lr} ) + PLD( beq 2b ) + ldmfd sp!, {r4, pc} @ 3 +ENDPROC(copy_page) diff --git a/kernel/arch/arm/lib/copy_template.S b/kernel/arch/arm/lib/copy_template.S new file mode 100644 index 000000000..652e4d98c --- /dev/null +++ b/kernel/arch/arm/lib/copy_template.S @@ -0,0 +1,297 @@ +/* + * linux/arch/arm/lib/copy_template.s + * + * Code template for optimized memory copy functions + * + * Author: Nicolas Pitre + * Created: Sep 28, 2005 + * Copyright: MontaVista Software, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* + * Theory of operation + * ------------------- + * + * This file provides the core code for a forward memory copy used in + * the implementation of memcopy(), copy_to_user() and copy_from_user(). + * + * The including file must define the following accessor macros + * according to the need of the given function: + * + * ldr1w ptr reg abort + * + * This loads one word from 'ptr', stores it in 'reg' and increments + * 'ptr' to the next word. The 'abort' argument is used for fixup tables. + * + * ldr4w ptr reg1 reg2 reg3 reg4 abort + * ldr8w ptr, reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort + * + * This loads four or eight words starting from 'ptr', stores them + * in provided registers and increments 'ptr' past those words. + * The'abort' argument is used for fixup tables. + * + * ldr1b ptr reg cond abort + * + * Similar to ldr1w, but it loads a byte and increments 'ptr' one byte. + * It also must apply the condition code if provided, otherwise the + * "al" condition is assumed by default. + * + * str1w ptr reg abort + * str8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort + * str1b ptr reg cond abort + * + * Same as their ldr* counterparts, but data is stored to 'ptr' location + * rather than being loaded. + * + * enter reg1 reg2 + * + * Preserve the provided registers on the stack plus any additional + * data as needed by the implementation including this code. Called + * upon code entry. + * + * usave reg1 reg2 + * + * Unwind annotation macro is corresponding for 'enter' macro. + * It tell unwinder that preserved some provided registers on the stack + * and additional data by a prior 'enter' macro. + * + * exit reg1 reg2 + * + * Restore registers with the values previously saved with the + * 'preserv' macro. Called upon code termination. + * + * LDR1W_SHIFT + * STR1W_SHIFT + * + * Correction to be applied to the "ip" register when branching into + * the ldr1w or str1w instructions (some of these macros may expand to + * than one 32bit instruction in Thumb-2) + */ + + + UNWIND( .fnstart ) + enter r4, lr + UNWIND( .fnend ) + + UNWIND( .fnstart ) + usave r4, lr @ in first stmdb block + + subs r2, r2, #4 + blt 8f + ands ip, r0, #3 + PLD( pld [r1, #0] ) + bne 9f + ands ip, r1, #3 + bne 10f + +1: subs r2, r2, #(28) + stmfd sp!, {r5 - r8} + UNWIND( .fnend ) + + UNWIND( .fnstart ) + usave r4, lr + UNWIND( .save {r5 - r8} ) @ in second stmfd block + blt 5f + + CALGN( ands ip, r0, #31 ) + CALGN( rsb r3, ip, #32 ) + CALGN( sbcnes r4, r3, r2 ) @ C is always set here + CALGN( bcs 2f ) + CALGN( adr r4, 6f ) + CALGN( subs r2, r2, r3 ) @ C gets set + CALGN( add pc, r4, ip ) + + PLD( pld [r1, #0] ) +2: PLD( subs r2, r2, #96 ) + PLD( pld [r1, #28] ) + PLD( blt 4f ) + PLD( pld [r1, #60] ) + PLD( pld [r1, #92] ) + +3: PLD( pld [r1, #124] ) +4: ldr8w r1, r3, r4, r5, r6, r7, r8, ip, lr, abort=20f + subs r2, r2, #32 + str8w r0, r3, r4, r5, r6, r7, r8, ip, lr, abort=20f + bge 3b + PLD( cmn r2, #96 ) + PLD( bge 4b ) + +5: ands ip, r2, #28 + rsb ip, ip, #32 +#if LDR1W_SHIFT > 0 + lsl ip, ip, #LDR1W_SHIFT +#endif + addne pc, pc, ip @ C is always clear here + b 7f +6: + .rept (1 << LDR1W_SHIFT) + W(nop) + .endr + ldr1w r1, r3, abort=20f + ldr1w r1, r4, abort=20f + ldr1w r1, r5, abort=20f + ldr1w r1, r6, abort=20f + ldr1w r1, r7, abort=20f + ldr1w r1, r8, abort=20f + ldr1w r1, lr, abort=20f + +#if LDR1W_SHIFT < STR1W_SHIFT + lsl ip, ip, #STR1W_SHIFT - LDR1W_SHIFT +#elif LDR1W_SHIFT > STR1W_SHIFT + lsr ip, ip, #LDR1W_SHIFT - STR1W_SHIFT +#endif + add pc, pc, ip + nop + .rept (1 << STR1W_SHIFT) + W(nop) + .endr + str1w r0, r3, abort=20f + str1w r0, r4, abort=20f + str1w r0, r5, abort=20f + str1w r0, r6, abort=20f + str1w r0, r7, abort=20f + str1w r0, r8, abort=20f + str1w r0, lr, abort=20f + + CALGN( bcs 2b ) + +7: ldmfd sp!, {r5 - r8} + UNWIND( .fnend ) @ end of second stmfd block + + UNWIND( .fnstart ) + usave r4, lr @ still in first stmdb block +8: movs r2, r2, lsl #31 + ldr1b r1, r3, ne, abort=21f + ldr1b r1, r4, cs, abort=21f + ldr1b r1, ip, cs, abort=21f + str1b r0, r3, ne, abort=21f + str1b r0, r4, cs, abort=21f + str1b r0, ip, cs, abort=21f + + exit r4, pc + +9: rsb ip, ip, #4 + cmp ip, #2 + ldr1b r1, r3, gt, abort=21f + ldr1b r1, r4, ge, abort=21f + ldr1b r1, lr, abort=21f + str1b r0, r3, gt, abort=21f + str1b r0, r4, ge, abort=21f + subs r2, r2, ip + str1b r0, lr, abort=21f + blt 8b + ands ip, r1, #3 + beq 1b + +10: bic r1, r1, #3 + cmp ip, #2 + ldr1w r1, lr, abort=21f + beq 17f + bgt 18f + UNWIND( .fnend ) + + + .macro forward_copy_shift pull push + + UNWIND( .fnstart ) + usave r4, lr @ still in first stmdb block + subs r2, r2, #28 + blt 14f + + CALGN( ands ip, r0, #31 ) + CALGN( rsb ip, ip, #32 ) + CALGN( sbcnes r4, ip, r2 ) @ C is always set here + CALGN( subcc r2, r2, ip ) + CALGN( bcc 15f ) + +11: stmfd sp!, {r5 - r9} + UNWIND( .fnend ) + + UNWIND( .fnstart ) + usave r4, lr + UNWIND( .save {r5 - r9} ) @ in new second stmfd block + PLD( pld [r1, #0] ) + PLD( subs r2, r2, #96 ) + PLD( pld [r1, #28] ) + PLD( blt 13f ) + PLD( pld [r1, #60] ) + PLD( pld [r1, #92] ) + +12: PLD( pld [r1, #124] ) +13: ldr4w r1, r4, r5, r6, r7, abort=19f + mov r3, lr, lspull #\pull + subs r2, r2, #32 + ldr4w r1, r8, r9, ip, lr, abort=19f + orr r3, r3, r4, lspush #\push + mov r4, r4, lspull #\pull + orr r4, r4, r5, lspush #\push + mov r5, r5, lspull #\pull + orr r5, r5, r6, lspush #\push + mov r6, r6, lspull #\pull + orr r6, r6, r7, lspush #\push + mov r7, r7, lspull #\pull + orr r7, r7, r8, lspush #\push + mov r8, r8, lspull #\pull + orr r8, r8, r9, lspush #\push + mov r9, r9, lspull #\pull + orr r9, r9, ip, lspush #\push + mov ip, ip, lspull #\pull + orr ip, ip, lr, lspush #\push + str8w r0, r3, r4, r5, r6, r7, r8, r9, ip, , abort=19f + bge 12b + PLD( cmn r2, #96 ) + PLD( bge 13b ) + + ldmfd sp!, {r5 - r9} + UNWIND( .fnend ) @ end of the second stmfd block + + UNWIND( .fnstart ) + usave r4, lr @ still in first stmdb block +14: ands ip, r2, #28 + beq 16f + +15: mov r3, lr, lspull #\pull + ldr1w r1, lr, abort=21f + subs ip, ip, #4 + orr r3, r3, lr, lspush #\push + str1w r0, r3, abort=21f + bgt 15b + CALGN( cmp r2, #0 ) + CALGN( bge 11b ) + +16: sub r1, r1, #(\push / 8) + b 8b + UNWIND( .fnend ) + + .endm + + + forward_copy_shift pull=8 push=24 + +17: forward_copy_shift pull=16 push=16 + +18: forward_copy_shift pull=24 push=8 + + +/* + * Abort preamble and completion macros. + * If a fixup handler is required then those macros must surround it. + * It is assumed that the fixup code will handle the private part of + * the exit macro. + */ + + .macro copy_abort_preamble +19: ldmfd sp!, {r5 - r9} + b 21f +20: ldmfd sp!, {r5 - r8} +21: + .endm + + .macro copy_abort_end + ldmfd sp!, {r4, pc} + .endm + diff --git a/kernel/arch/arm/lib/copy_to_user.S b/kernel/arch/arm/lib/copy_to_user.S new file mode 100644 index 000000000..9648b0675 --- /dev/null +++ b/kernel/arch/arm/lib/copy_to_user.S @@ -0,0 +1,111 @@ +/* + * linux/arch/arm/lib/copy_to_user.S + * + * Author: Nicolas Pitre + * Created: Sep 29, 2005 + * Copyright: MontaVista Software, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include + +/* + * Prototype: + * + * size_t __copy_to_user(void *to, const void *from, size_t n) + * + * Purpose: + * + * copy a block to user memory from kernel memory + * + * Params: + * + * to = user memory + * from = kernel memory + * n = number of bytes to copy + * + * Return value: + * + * Number of bytes NOT copied. + */ + +#define LDR1W_SHIFT 0 +#ifndef CONFIG_THUMB2_KERNEL +#define STR1W_SHIFT 0 +#else +#define STR1W_SHIFT 1 +#endif + + .macro ldr1w ptr reg abort + W(ldr) \reg, [\ptr], #4 + .endm + + .macro ldr4w ptr reg1 reg2 reg3 reg4 abort + ldmia \ptr!, {\reg1, \reg2, \reg3, \reg4} + .endm + + .macro ldr8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort + ldmia \ptr!, {\reg1, \reg2, \reg3, \reg4, \reg5, \reg6, \reg7, \reg8} + .endm + + .macro ldr1b ptr reg cond=al abort + ldr\cond\()b \reg, [\ptr], #1 + .endm + + .macro str1w ptr reg abort + strusr \reg, \ptr, 4, abort=\abort + .endm + + .macro str8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort + str1w \ptr, \reg1, \abort + str1w \ptr, \reg2, \abort + str1w \ptr, \reg3, \abort + str1w \ptr, \reg4, \abort + str1w \ptr, \reg5, \abort + str1w \ptr, \reg6, \abort + str1w \ptr, \reg7, \abort + str1w \ptr, \reg8, \abort + .endm + + .macro str1b ptr reg cond=al abort + strusr \reg, \ptr, 1, \cond, abort=\abort + .endm + + .macro enter reg1 reg2 + mov r3, #0 + stmdb sp!, {r0, r2, r3, \reg1, \reg2} + .endm + + .macro usave reg1 reg2 + UNWIND( .save {r0, r2, r3, \reg1, \reg2} ) + .endm + + .macro exit reg1 reg2 + add sp, sp, #8 + ldmfd sp!, {r0, \reg1, \reg2} + .endm + + .text + +ENTRY(__copy_to_user_std) +WEAK(__copy_to_user) + +#include "copy_template.S" + +ENDPROC(__copy_to_user) +ENDPROC(__copy_to_user_std) + + .pushsection .text.fixup,"ax" + .align 0 + copy_abort_preamble + ldmfd sp!, {r1, r2, r3} + sub r0, r0, r1 + rsb r0, r0, r2 + copy_abort_end + .popsection + diff --git a/kernel/arch/arm/lib/csumipv6.S b/kernel/arch/arm/lib/csumipv6.S new file mode 100644 index 000000000..3ac6ef01b --- /dev/null +++ b/kernel/arch/arm/lib/csumipv6.S @@ -0,0 +1,33 @@ +/* + * linux/arch/arm/lib/csumipv6.S + * + * Copyright (C) 1995-1998 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include + + .text + +ENTRY(__csum_ipv6_magic) + str lr, [sp, #-4]! + adds ip, r2, r3 + ldmia r1, {r1 - r3, lr} + adcs ip, ip, r1 + adcs ip, ip, r2 + adcs ip, ip, r3 + adcs ip, ip, lr + ldmia r0, {r0 - r3} + adcs r0, ip, r0 + adcs r0, r0, r1 + adcs r0, r0, r2 + ldr r2, [sp, #4] + adcs r0, r0, r3 + adcs r0, r0, r2 + adcs r0, r0, #0 + ldmfd sp!, {pc} +ENDPROC(__csum_ipv6_magic) + diff --git a/kernel/arch/arm/lib/csumpartial.S b/kernel/arch/arm/lib/csumpartial.S new file mode 100644 index 000000000..984e0f29d --- /dev/null +++ b/kernel/arch/arm/lib/csumpartial.S @@ -0,0 +1,142 @@ +/* + * linux/arch/arm/lib/csumpartial.S + * + * Copyright (C) 1995-1998 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include + + .text + +/* + * Function: __u32 csum_partial(const char *src, int len, __u32 sum) + * Params : r0 = buffer, r1 = len, r2 = checksum + * Returns : r0 = new checksum + */ + +buf .req r0 +len .req r1 +sum .req r2 +td0 .req r3 +td1 .req r4 @ save before use +td2 .req r5 @ save before use +td3 .req lr + +.Lzero: mov r0, sum + add sp, sp, #4 + ldr pc, [sp], #4 + + /* + * Handle 0 to 7 bytes, with any alignment of source and + * destination pointers. Note that when we get here, C = 0 + */ +.Lless8: teq len, #0 @ check for zero count + beq .Lzero + + /* we must have at least one byte. */ + tst buf, #1 @ odd address? + movne sum, sum, ror #8 + ldrneb td0, [buf], #1 + subne len, len, #1 + adcnes sum, sum, td0, put_byte_1 + +.Lless4: tst len, #6 + beq .Lless8_byte + + /* we are now half-word aligned */ + +.Lless8_wordlp: +#if __LINUX_ARM_ARCH__ >= 4 + ldrh td0, [buf], #2 + sub len, len, #2 +#else + ldrb td0, [buf], #1 + ldrb td3, [buf], #1 + sub len, len, #2 +#ifndef __ARMEB__ + orr td0, td0, td3, lsl #8 +#else + orr td0, td3, td0, lsl #8 +#endif +#endif + adcs sum, sum, td0 + tst len, #6 + bne .Lless8_wordlp + +.Lless8_byte: tst len, #1 @ odd number of bytes + ldrneb td0, [buf], #1 @ include last byte + adcnes sum, sum, td0, put_byte_0 @ update checksum + +.Ldone: adc r0, sum, #0 @ collect up the last carry + ldr td0, [sp], #4 + tst td0, #1 @ check buffer alignment + movne r0, r0, ror #8 @ rotate checksum by 8 bits + ldr pc, [sp], #4 @ return + +.Lnot_aligned: tst buf, #1 @ odd address + ldrneb td0, [buf], #1 @ make even + subne len, len, #1 + adcnes sum, sum, td0, put_byte_1 @ update checksum + + tst buf, #2 @ 32-bit aligned? +#if __LINUX_ARM_ARCH__ >= 4 + ldrneh td0, [buf], #2 @ make 32-bit aligned + subne len, len, #2 +#else + ldrneb td0, [buf], #1 + ldrneb ip, [buf], #1 + subne len, len, #2 +#ifndef __ARMEB__ + orrne td0, td0, ip, lsl #8 +#else + orrne td0, ip, td0, lsl #8 +#endif +#endif + adcnes sum, sum, td0 @ update checksum + ret lr + +ENTRY(csum_partial) + stmfd sp!, {buf, lr} + cmp len, #8 @ Ensure that we have at least + blo .Lless8 @ 8 bytes to copy. + + tst buf, #1 + movne sum, sum, ror #8 + + adds sum, sum, #0 @ C = 0 + tst buf, #3 @ Test destination alignment + blne .Lnot_aligned @ align destination, return here + +1: bics ip, len, #31 + beq 3f + + stmfd sp!, {r4 - r5} +2: ldmia buf!, {td0, td1, td2, td3} + adcs sum, sum, td0 + adcs sum, sum, td1 + adcs sum, sum, td2 + adcs sum, sum, td3 + ldmia buf!, {td0, td1, td2, td3} + adcs sum, sum, td0 + adcs sum, sum, td1 + adcs sum, sum, td2 + adcs sum, sum, td3 + sub ip, ip, #32 + teq ip, #0 + bne 2b + ldmfd sp!, {r4 - r5} + +3: tst len, #0x1c @ should not change C + beq .Lless4 + +4: ldr td0, [buf], #4 + sub len, len, #4 + adcs sum, sum, td0 + tst len, #0x1c + bne 4b + b .Lless4 +ENDPROC(csum_partial) diff --git a/kernel/arch/arm/lib/csumpartialcopy.S b/kernel/arch/arm/lib/csumpartialcopy.S new file mode 100644 index 000000000..d03fc71fc --- /dev/null +++ b/kernel/arch/arm/lib/csumpartialcopy.S @@ -0,0 +1,53 @@ +/* + * linux/arch/arm/lib/csumpartialcopy.S + * + * Copyright (C) 1995-1998 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include + + .text + +/* Function: __u32 csum_partial_copy_nocheck(const char *src, char *dst, int len, __u32 sum) + * Params : r0 = src, r1 = dst, r2 = len, r3 = checksum + * Returns : r0 = new checksum + */ + + .macro save_regs + stmfd sp!, {r1, r4 - r8, lr} + .endm + + .macro load_regs + ldmfd sp!, {r1, r4 - r8, pc} + .endm + + .macro load1b, reg1 + ldrb \reg1, [r0], #1 + .endm + + .macro load2b, reg1, reg2 + ldrb \reg1, [r0], #1 + ldrb \reg2, [r0], #1 + .endm + + .macro load1l, reg1 + ldr \reg1, [r0], #4 + .endm + + .macro load2l, reg1, reg2 + ldr \reg1, [r0], #4 + ldr \reg2, [r0], #4 + .endm + + .macro load4l, reg1, reg2, reg3, reg4 + ldmia r0!, {\reg1, \reg2, \reg3, \reg4} + .endm + +#define FN_ENTRY ENTRY(csum_partial_copy_nocheck) +#define FN_EXIT ENDPROC(csum_partial_copy_nocheck) + +#include "csumpartialcopygeneric.S" diff --git a/kernel/arch/arm/lib/csumpartialcopygeneric.S b/kernel/arch/arm/lib/csumpartialcopygeneric.S new file mode 100644 index 000000000..10b459096 --- /dev/null +++ b/kernel/arch/arm/lib/csumpartialcopygeneric.S @@ -0,0 +1,333 @@ +/* + * linux/arch/arm/lib/csumpartialcopygeneric.S + * + * Copyright (C) 1995-2001 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include + +/* + * unsigned int + * csum_partial_copy_xxx(const char *src, char *dst, int len, int sum, ) + * r0 = src, r1 = dst, r2 = len, r3 = sum + * Returns : r0 = checksum + * + * Note that 'tst' and 'teq' preserve the carry flag. + */ + +src .req r0 +dst .req r1 +len .req r2 +sum .req r3 + +.Lzero: mov r0, sum + load_regs + + /* + * Align an unaligned destination pointer. We know that + * we have >= 8 bytes here, so we don't need to check + * the length. Note that the source pointer hasn't been + * aligned yet. + */ +.Ldst_unaligned: + tst dst, #1 + beq .Ldst_16bit + + load1b ip + sub len, len, #1 + adcs sum, sum, ip, put_byte_1 @ update checksum + strb ip, [dst], #1 + tst dst, #2 + reteq lr @ dst is now 32bit aligned + +.Ldst_16bit: load2b r8, ip + sub len, len, #2 + adcs sum, sum, r8, put_byte_0 + strb r8, [dst], #1 + adcs sum, sum, ip, put_byte_1 + strb ip, [dst], #1 + ret lr @ dst is now 32bit aligned + + /* + * Handle 0 to 7 bytes, with any alignment of source and + * destination pointers. Note that when we get here, C = 0 + */ +.Lless8: teq len, #0 @ check for zero count + beq .Lzero + + /* we must have at least one byte. */ + tst dst, #1 @ dst 16-bit aligned + beq .Lless8_aligned + + /* Align dst */ + load1b ip + sub len, len, #1 + adcs sum, sum, ip, put_byte_1 @ update checksum + strb ip, [dst], #1 + tst len, #6 + beq .Lless8_byteonly + +1: load2b r8, ip + sub len, len, #2 + adcs sum, sum, r8, put_byte_0 + strb r8, [dst], #1 + adcs sum, sum, ip, put_byte_1 + strb ip, [dst], #1 +.Lless8_aligned: + tst len, #6 + bne 1b +.Lless8_byteonly: + tst len, #1 + beq .Ldone + load1b r8 + adcs sum, sum, r8, put_byte_0 @ update checksum + strb r8, [dst], #1 + b .Ldone + +FN_ENTRY + save_regs + + cmp len, #8 @ Ensure that we have at least + blo .Lless8 @ 8 bytes to copy. + + adds sum, sum, #0 @ C = 0 + tst dst, #3 @ Test destination alignment + blne .Ldst_unaligned @ align destination, return here + + /* + * Ok, the dst pointer is now 32bit aligned, and we know + * that we must have more than 4 bytes to copy. Note + * that C contains the carry from the dst alignment above. + */ + + tst src, #3 @ Test source alignment + bne .Lsrc_not_aligned + + /* Routine for src & dst aligned */ + + bics ip, len, #15 + beq 2f + +1: load4l r4, r5, r6, r7 + stmia dst!, {r4, r5, r6, r7} + adcs sum, sum, r4 + adcs sum, sum, r5 + adcs sum, sum, r6 + adcs sum, sum, r7 + sub ip, ip, #16 + teq ip, #0 + bne 1b + +2: ands ip, len, #12 + beq 4f + tst ip, #8 + beq 3f + load2l r4, r5 + stmia dst!, {r4, r5} + adcs sum, sum, r4 + adcs sum, sum, r5 + tst ip, #4 + beq 4f + +3: load1l r4 + str r4, [dst], #4 + adcs sum, sum, r4 + +4: ands len, len, #3 + beq .Ldone + load1l r4 + tst len, #2 + mov r5, r4, get_byte_0 + beq .Lexit + adcs sum, sum, r4, lspush #16 + strb r5, [dst], #1 + mov r5, r4, get_byte_1 + strb r5, [dst], #1 + mov r5, r4, get_byte_2 +.Lexit: tst len, #1 + strneb r5, [dst], #1 + andne r5, r5, #255 + adcnes sum, sum, r5, put_byte_0 + + /* + * If the dst pointer was not 16-bit aligned, we + * need to rotate the checksum here to get around + * the inefficient byte manipulations in the + * architecture independent code. + */ +.Ldone: adc r0, sum, #0 + ldr sum, [sp, #0] @ dst + tst sum, #1 + movne r0, r0, ror #8 + load_regs + +.Lsrc_not_aligned: + adc sum, sum, #0 @ include C from dst alignment + and ip, src, #3 + bic src, src, #3 + load1l r5 + cmp ip, #2 + beq .Lsrc2_aligned + bhi .Lsrc3_aligned + mov r4, r5, lspull #8 @ C = 0 + bics ip, len, #15 + beq 2f +1: load4l r5, r6, r7, r8 + orr r4, r4, r5, lspush #24 + mov r5, r5, lspull #8 + orr r5, r5, r6, lspush #24 + mov r6, r6, lspull #8 + orr r6, r6, r7, lspush #24 + mov r7, r7, lspull #8 + orr r7, r7, r8, lspush #24 + stmia dst!, {r4, r5, r6, r7} + adcs sum, sum, r4 + adcs sum, sum, r5 + adcs sum, sum, r6 + adcs sum, sum, r7 + mov r4, r8, lspull #8 + sub ip, ip, #16 + teq ip, #0 + bne 1b +2: ands ip, len, #12 + beq 4f + tst ip, #8 + beq 3f + load2l r5, r6 + orr r4, r4, r5, lspush #24 + mov r5, r5, lspull #8 + orr r5, r5, r6, lspush #24 + stmia dst!, {r4, r5} + adcs sum, sum, r4 + adcs sum, sum, r5 + mov r4, r6, lspull #8 + tst ip, #4 + beq 4f +3: load1l r5 + orr r4, r4, r5, lspush #24 + str r4, [dst], #4 + adcs sum, sum, r4 + mov r4, r5, lspull #8 +4: ands len, len, #3 + beq .Ldone + mov r5, r4, get_byte_0 + tst len, #2 + beq .Lexit + adcs sum, sum, r4, lspush #16 + strb r5, [dst], #1 + mov r5, r4, get_byte_1 + strb r5, [dst], #1 + mov r5, r4, get_byte_2 + b .Lexit + +.Lsrc2_aligned: mov r4, r5, lspull #16 + adds sum, sum, #0 + bics ip, len, #15 + beq 2f +1: load4l r5, r6, r7, r8 + orr r4, r4, r5, lspush #16 + mov r5, r5, lspull #16 + orr r5, r5, r6, lspush #16 + mov r6, r6, lspull #16 + orr r6, r6, r7, lspush #16 + mov r7, r7, lspull #16 + orr r7, r7, r8, lspush #16 + stmia dst!, {r4, r5, r6, r7} + adcs sum, sum, r4 + adcs sum, sum, r5 + adcs sum, sum, r6 + adcs sum, sum, r7 + mov r4, r8, lspull #16 + sub ip, ip, #16 + teq ip, #0 + bne 1b +2: ands ip, len, #12 + beq 4f + tst ip, #8 + beq 3f + load2l r5, r6 + orr r4, r4, r5, lspush #16 + mov r5, r5, lspull #16 + orr r5, r5, r6, lspush #16 + stmia dst!, {r4, r5} + adcs sum, sum, r4 + adcs sum, sum, r5 + mov r4, r6, lspull #16 + tst ip, #4 + beq 4f +3: load1l r5 + orr r4, r4, r5, lspush #16 + str r4, [dst], #4 + adcs sum, sum, r4 + mov r4, r5, lspull #16 +4: ands len, len, #3 + beq .Ldone + mov r5, r4, get_byte_0 + tst len, #2 + beq .Lexit + adcs sum, sum, r4 + strb r5, [dst], #1 + mov r5, r4, get_byte_1 + strb r5, [dst], #1 + tst len, #1 + beq .Ldone + load1b r5 + b .Lexit + +.Lsrc3_aligned: mov r4, r5, lspull #24 + adds sum, sum, #0 + bics ip, len, #15 + beq 2f +1: load4l r5, r6, r7, r8 + orr r4, r4, r5, lspush #8 + mov r5, r5, lspull #24 + orr r5, r5, r6, lspush #8 + mov r6, r6, lspull #24 + orr r6, r6, r7, lspush #8 + mov r7, r7, lspull #24 + orr r7, r7, r8, lspush #8 + stmia dst!, {r4, r5, r6, r7} + adcs sum, sum, r4 + adcs sum, sum, r5 + adcs sum, sum, r6 + adcs sum, sum, r7 + mov r4, r8, lspull #24 + sub ip, ip, #16 + teq ip, #0 + bne 1b +2: ands ip, len, #12 + beq 4f + tst ip, #8 + beq 3f + load2l r5, r6 + orr r4, r4, r5, lspush #8 + mov r5, r5, lspull #24 + orr r5, r5, r6, lspush #8 + stmia dst!, {r4, r5} + adcs sum, sum, r4 + adcs sum, sum, r5 + mov r4, r6, lspull #24 + tst ip, #4 + beq 4f +3: load1l r5 + orr r4, r4, r5, lspush #8 + str r4, [dst], #4 + adcs sum, sum, r4 + mov r4, r5, lspull #24 +4: ands len, len, #3 + beq .Ldone + mov r5, r4, get_byte_0 + tst len, #2 + beq .Lexit + strb r5, [dst], #1 + adcs sum, sum, r4 + load1l r4 + mov r5, r4, get_byte_0 + strb r5, [dst], #1 + adcs sum, sum, r4, lspush #24 + mov r5, r4, get_byte_1 + b .Lexit +FN_EXIT diff --git a/kernel/arch/arm/lib/csumpartialcopyuser.S b/kernel/arch/arm/lib/csumpartialcopyuser.S new file mode 100644 index 000000000..1d0957e61 --- /dev/null +++ b/kernel/arch/arm/lib/csumpartialcopyuser.S @@ -0,0 +1,83 @@ +/* + * linux/arch/arm/lib/csumpartialcopyuser.S + * + * Copyright (C) 1995-1998 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * 27/03/03 Ian Molton Clean up CONFIG_CPU + * + */ +#include +#include +#include +#include + + .text + + .macro save_regs + stmfd sp!, {r1, r2, r4 - r8, lr} + .endm + + .macro load_regs + ldmfd sp!, {r1, r2, r4 - r8, pc} + .endm + + .macro load1b, reg1 + ldrusr \reg1, r0, 1 + .endm + + .macro load2b, reg1, reg2 + ldrusr \reg1, r0, 1 + ldrusr \reg2, r0, 1 + .endm + + .macro load1l, reg1 + ldrusr \reg1, r0, 4 + .endm + + .macro load2l, reg1, reg2 + ldrusr \reg1, r0, 4 + ldrusr \reg2, r0, 4 + .endm + + .macro load4l, reg1, reg2, reg3, reg4 + ldrusr \reg1, r0, 4 + ldrusr \reg2, r0, 4 + ldrusr \reg3, r0, 4 + ldrusr \reg4, r0, 4 + .endm + +/* + * unsigned int + * csum_partial_copy_from_user(const char *src, char *dst, int len, int sum, int *err_ptr) + * r0 = src, r1 = dst, r2 = len, r3 = sum, [sp] = *err_ptr + * Returns : r0 = checksum, [[sp, #0], #0] = 0 or -EFAULT + */ + +#define FN_ENTRY ENTRY(csum_partial_copy_from_user) +#define FN_EXIT ENDPROC(csum_partial_copy_from_user) + +#include "csumpartialcopygeneric.S" + +/* + * FIXME: minor buglet here + * We don't return the checksum for the data present in the buffer. To do + * so properly, we would have to add in whatever registers were loaded before + * the fault, which, with the current asm above is not predictable. + */ + .pushsection .text.fixup,"ax" + .align 4 +9001: mov r4, #-EFAULT + ldr r5, [sp, #8*4] @ *err_ptr + str r4, [r5] + ldmia sp, {r1, r2} @ retrieve dst, len + add r2, r2, r1 + mov r0, #0 @ zero the buffer +9002: teq r2, r1 + strneb r0, [r1], #1 + bne 9002b + load_regs + .popsection diff --git a/kernel/arch/arm/lib/delay-loop.S b/kernel/arch/arm/lib/delay-loop.S new file mode 100644 index 000000000..518bf6e93 --- /dev/null +++ b/kernel/arch/arm/lib/delay-loop.S @@ -0,0 +1,68 @@ +/* + * linux/arch/arm/lib/delay.S + * + * Copyright (C) 1995, 1996 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include +#include + .text + +.LC0: .word loops_per_jiffy +.LC1: .word UDELAY_MULT + +/* + * r0 <= 2000 + * lpj <= 0x01ffffff (max. 3355 bogomips) + * HZ <= 1000 + */ + +ENTRY(__loop_udelay) + ldr r2, .LC1 + mul r0, r2, r0 +ENTRY(__loop_const_udelay) @ 0 <= r0 <= 0x7fffff06 + mov r1, #-1 + ldr r2, .LC0 + ldr r2, [r2] @ max = 0x01ffffff + add r0, r0, r1, lsr #32-14 + mov r0, r0, lsr #14 @ max = 0x0001ffff + add r2, r2, r1, lsr #32-10 + mov r2, r2, lsr #10 @ max = 0x00007fff + mul r0, r2, r0 @ max = 2^32-1 + add r0, r0, r1, lsr #32-6 + movs r0, r0, lsr #6 + reteq lr + +/* + * loops = r0 * HZ * loops_per_jiffy / 1000000 + */ + .align 3 + +@ Delay routine +ENTRY(__loop_delay) + subs r0, r0, #1 +#if 0 + retls lr + subs r0, r0, #1 + retls lr + subs r0, r0, #1 + retls lr + subs r0, r0, #1 + retls lr + subs r0, r0, #1 + retls lr + subs r0, r0, #1 + retls lr + subs r0, r0, #1 + retls lr + subs r0, r0, #1 +#endif + bhi __loop_delay + ret lr +ENDPROC(__loop_udelay) +ENDPROC(__loop_const_udelay) +ENDPROC(__loop_delay) diff --git a/kernel/arch/arm/lib/delay.c b/kernel/arch/arm/lib/delay.c new file mode 100644 index 000000000..8044591dc --- /dev/null +++ b/kernel/arch/arm/lib/delay.c @@ -0,0 +1,117 @@ +/* + * Delay loops based on the OpenRISC implementation. + * + * Copyright (C) 2012 ARM Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Author: Will Deacon + */ + +#include +#include +#include +#include +#include +#include + +/* + * Default to the loop-based delay implementation. + */ +struct arm_delay_ops arm_delay_ops = { + .delay = __loop_delay, + .const_udelay = __loop_const_udelay, + .udelay = __loop_udelay, +}; + +static const struct delay_timer *delay_timer; +static bool delay_calibrated; +static u64 delay_res; + +int read_current_timer(unsigned long *timer_val) +{ + if (!delay_timer) + return -ENXIO; + + *timer_val = delay_timer->read_current_timer(); + return 0; +} +EXPORT_SYMBOL_GPL(read_current_timer); + +static inline u64 cyc_to_ns(u64 cyc, u32 mult, u32 shift) +{ + return (cyc * mult) >> shift; +} + +static void __timer_delay(unsigned long cycles) +{ + cycles_t start = get_cycles(); + + while ((get_cycles() - start) < cycles) + cpu_relax(); +} + +static void __timer_const_udelay(unsigned long xloops) +{ + unsigned long long loops = xloops; + loops *= arm_delay_ops.ticks_per_jiffy; + __timer_delay(loops >> UDELAY_SHIFT); +} + +static void __timer_udelay(unsigned long usecs) +{ + __timer_const_udelay(usecs * UDELAY_MULT); +} + +void __init register_current_timer_delay(const struct delay_timer *timer) +{ + u32 new_mult, new_shift; + u64 res; + + clocks_calc_mult_shift(&new_mult, &new_shift, timer->freq, + NSEC_PER_SEC, 3600); + res = cyc_to_ns(1ULL, new_mult, new_shift); + + if (res > 1000) { + pr_err("Ignoring delay timer %ps, which has insufficient resolution of %lluns\n", + timer, res); + return; + } + + if (!delay_calibrated && (!delay_res || (res < delay_res))) { + pr_info("Switching to timer-based delay loop, resolution %lluns\n", res); + delay_timer = timer; + lpj_fine = timer->freq / HZ; + delay_res = res; + + /* cpufreq may scale loops_per_jiffy, so keep a private copy */ + arm_delay_ops.ticks_per_jiffy = lpj_fine; + arm_delay_ops.delay = __timer_delay; + arm_delay_ops.const_udelay = __timer_const_udelay; + arm_delay_ops.udelay = __timer_udelay; + } else { + pr_info("Ignoring duplicate/late registration of read_current_timer delay\n"); + } +} + +unsigned long calibrate_delay_is_known(void) +{ + delay_calibrated = true; + return lpj_fine; +} + +void calibration_delay_done(void) +{ + delay_calibrated = true; +} diff --git a/kernel/arch/arm/lib/div64.S b/kernel/arch/arm/lib/div64.S new file mode 100644 index 000000000..a9eafe498 --- /dev/null +++ b/kernel/arch/arm/lib/div64.S @@ -0,0 +1,212 @@ +/* + * linux/arch/arm/lib/div64.S + * + * Optimized computation of 64-bit dividend / 32-bit divisor + * + * Author: Nicolas Pitre + * Created: Oct 5, 2003 + * Copyright: Monta Vista Software, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include + +#ifdef __ARMEB__ +#define xh r0 +#define xl r1 +#define yh r2 +#define yl r3 +#else +#define xl r0 +#define xh r1 +#define yl r2 +#define yh r3 +#endif + +/* + * __do_div64: perform a division with 64-bit dividend and 32-bit divisor. + * + * Note: Calling convention is totally non standard for optimal code. + * This is meant to be used by do_div() from include/asm/div64.h only. + * + * Input parameters: + * xh-xl = dividend (clobbered) + * r4 = divisor (preserved) + * + * Output values: + * yh-yl = result + * xh = remainder + * + * Clobbered regs: xl, ip + */ + +ENTRY(__do_div64) +UNWIND(.fnstart) + + @ Test for easy paths first. + subs ip, r4, #1 + bls 9f @ divisor is 0 or 1 + tst ip, r4 + beq 8f @ divisor is power of 2 + + @ See if we need to handle upper 32-bit result. + cmp xh, r4 + mov yh, #0 + blo 3f + + @ Align divisor with upper part of dividend. + @ The aligned divisor is stored in yl preserving the original. + @ The bit position is stored in ip. + +#if __LINUX_ARM_ARCH__ >= 5 + + clz yl, r4 + clz ip, xh + sub yl, yl, ip + mov ip, #1 + mov ip, ip, lsl yl + mov yl, r4, lsl yl + +#else + + mov yl, r4 + mov ip, #1 +1: cmp yl, #0x80000000 + cmpcc yl, xh + movcc yl, yl, lsl #1 + movcc ip, ip, lsl #1 + bcc 1b + +#endif + + @ The division loop for needed upper bit positions. + @ Break out early if dividend reaches 0. +2: cmp xh, yl + orrcs yh, yh, ip + subcss xh, xh, yl + movnes ip, ip, lsr #1 + mov yl, yl, lsr #1 + bne 2b + + @ See if we need to handle lower 32-bit result. +3: cmp xh, #0 + mov yl, #0 + cmpeq xl, r4 + movlo xh, xl + retlo lr + + @ The division loop for lower bit positions. + @ Here we shift remainer bits leftwards rather than moving the + @ divisor for comparisons, considering the carry-out bit as well. + mov ip, #0x80000000 +4: movs xl, xl, lsl #1 + adcs xh, xh, xh + beq 6f + cmpcc xh, r4 +5: orrcs yl, yl, ip + subcs xh, xh, r4 + movs ip, ip, lsr #1 + bne 4b + ret lr + + @ The top part of remainder became zero. If carry is set + @ (the 33th bit) this is a false positive so resume the loop. + @ Otherwise, if lower part is also null then we are done. +6: bcs 5b + cmp xl, #0 + reteq lr + + @ We still have remainer bits in the low part. Bring them up. + +#if __LINUX_ARM_ARCH__ >= 5 + + clz xh, xl @ we know xh is zero here so... + add xh, xh, #1 + mov xl, xl, lsl xh + mov ip, ip, lsr xh + +#else + +7: movs xl, xl, lsl #1 + mov ip, ip, lsr #1 + bcc 7b + +#endif + + @ Current remainder is now 1. It is worthless to compare with + @ divisor at this point since divisor can not be smaller than 3 here. + @ If possible, branch for another shift in the division loop. + @ If no bit position left then we are done. + movs ip, ip, lsr #1 + mov xh, #1 + bne 4b + ret lr + +8: @ Division by a power of 2: determine what that divisor order is + @ then simply shift values around + +#if __LINUX_ARM_ARCH__ >= 5 + + clz ip, r4 + rsb ip, ip, #31 + +#else + + mov yl, r4 + cmp r4, #(1 << 16) + mov ip, #0 + movhs yl, yl, lsr #16 + movhs ip, #16 + + cmp yl, #(1 << 8) + movhs yl, yl, lsr #8 + addhs ip, ip, #8 + + cmp yl, #(1 << 4) + movhs yl, yl, lsr #4 + addhs ip, ip, #4 + + cmp yl, #(1 << 2) + addhi ip, ip, #3 + addls ip, ip, yl, lsr #1 + +#endif + + mov yh, xh, lsr ip + mov yl, xl, lsr ip + rsb ip, ip, #32 + ARM( orr yl, yl, xh, lsl ip ) + THUMB( lsl xh, xh, ip ) + THUMB( orr yl, yl, xh ) + mov xh, xl, lsl ip + mov xh, xh, lsr ip + ret lr + + @ eq -> division by 1: obvious enough... +9: moveq yl, xl + moveq yh, xh + moveq xh, #0 + reteq lr +UNWIND(.fnend) + +UNWIND(.fnstart) +UNWIND(.pad #4) +UNWIND(.save {lr}) +Ldiv0_64: + @ Division by 0: + str lr, [sp, #-8]! + bl __div0 + + @ as wrong as it could be... + mov yl, #0 + mov yh, #0 + mov xh, #0 + ldr pc, [sp], #8 + +UNWIND(.fnend) +ENDPROC(__do_div64) diff --git a/kernel/arch/arm/lib/ecard.S b/kernel/arch/arm/lib/ecard.S new file mode 100644 index 000000000..e6057fa85 --- /dev/null +++ b/kernel/arch/arm/lib/ecard.S @@ -0,0 +1,44 @@ +/* + * linux/arch/arm/lib/ecard.S + * + * Copyright (C) 1995, 1996 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * 27/03/03 Ian Molton Clean up CONFIG_CPU + * + */ +#include +#include + +#define CPSR2SPSR(rt) \ + mrs rt, cpsr; \ + msr spsr_cxsf, rt + +@ Purpose: call an expansion card loader to read bytes. +@ Proto : char read_loader(int offset, char *card_base, char *loader); +@ Returns: byte read + +ENTRY(ecard_loader_read) + stmfd sp!, {r4 - r12, lr} + mov r11, r1 + mov r1, r0 + CPSR2SPSR(r0) + mov lr, pc + mov pc, r2 + ldmfd sp!, {r4 - r12, pc} + +@ Purpose: call an expansion card loader to reset the card +@ Proto : void read_loader(int card_base, char *loader); +@ Returns: byte read + +ENTRY(ecard_loader_reset) + stmfd sp!, {r4 - r12, lr} + mov r11, r0 + CPSR2SPSR(r0) + mov lr, pc + add pc, r1, #8 + ldmfd sp!, {r4 - r12, pc} + diff --git a/kernel/arch/arm/lib/findbit.S b/kernel/arch/arm/lib/findbit.S new file mode 100644 index 000000000..7848780e8 --- /dev/null +++ b/kernel/arch/arm/lib/findbit.S @@ -0,0 +1,196 @@ +/* + * linux/arch/arm/lib/findbit.S + * + * Copyright (C) 1995-2000 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * 16th March 2001 - John Ripley + * Fixed so that "size" is an exclusive not an inclusive quantity. + * All users of these functions expect exclusive sizes, and may + * also call with zero size. + * Reworked by rmk. + */ +#include +#include + .text + +/* + * Purpose : Find a 'zero' bit + * Prototype: int find_first_zero_bit(void *addr, unsigned int maxbit); + */ +ENTRY(_find_first_zero_bit_le) + teq r1, #0 + beq 3f + mov r2, #0 +1: + ARM( ldrb r3, [r0, r2, lsr #3] ) + THUMB( lsr r3, r2, #3 ) + THUMB( ldrb r3, [r0, r3] ) + eors r3, r3, #0xff @ invert bits + bne .L_found @ any now set - found zero bit + add r2, r2, #8 @ next bit pointer +2: cmp r2, r1 @ any more? + blo 1b +3: mov r0, r1 @ no free bits + ret lr +ENDPROC(_find_first_zero_bit_le) + +/* + * Purpose : Find next 'zero' bit + * Prototype: int find_next_zero_bit(void *addr, unsigned int maxbit, int offset) + */ +ENTRY(_find_next_zero_bit_le) + teq r1, #0 + beq 3b + ands ip, r2, #7 + beq 1b @ If new byte, goto old routine + ARM( ldrb r3, [r0, r2, lsr #3] ) + THUMB( lsr r3, r2, #3 ) + THUMB( ldrb r3, [r0, r3] ) + eor r3, r3, #0xff @ now looking for a 1 bit + movs r3, r3, lsr ip @ shift off unused bits + bne .L_found + orr r2, r2, #7 @ if zero, then no bits here + add r2, r2, #1 @ align bit pointer + b 2b @ loop for next bit +ENDPROC(_find_next_zero_bit_le) + +/* + * Purpose : Find a 'one' bit + * Prototype: int find_first_bit(const unsigned long *addr, unsigned int maxbit); + */ +ENTRY(_find_first_bit_le) + teq r1, #0 + beq 3f + mov r2, #0 +1: + ARM( ldrb r3, [r0, r2, lsr #3] ) + THUMB( lsr r3, r2, #3 ) + THUMB( ldrb r3, [r0, r3] ) + movs r3, r3 + bne .L_found @ any now set - found zero bit + add r2, r2, #8 @ next bit pointer +2: cmp r2, r1 @ any more? + blo 1b +3: mov r0, r1 @ no free bits + ret lr +ENDPROC(_find_first_bit_le) + +/* + * Purpose : Find next 'one' bit + * Prototype: int find_next_zero_bit(void *addr, unsigned int maxbit, int offset) + */ +ENTRY(_find_next_bit_le) + teq r1, #0 + beq 3b + ands ip, r2, #7 + beq 1b @ If new byte, goto old routine + ARM( ldrb r3, [r0, r2, lsr #3] ) + THUMB( lsr r3, r2, #3 ) + THUMB( ldrb r3, [r0, r3] ) + movs r3, r3, lsr ip @ shift off unused bits + bne .L_found + orr r2, r2, #7 @ if zero, then no bits here + add r2, r2, #1 @ align bit pointer + b 2b @ loop for next bit +ENDPROC(_find_next_bit_le) + +#ifdef __ARMEB__ + +ENTRY(_find_first_zero_bit_be) + teq r1, #0 + beq 3f + mov r2, #0 +1: eor r3, r2, #0x18 @ big endian byte ordering + ARM( ldrb r3, [r0, r3, lsr #3] ) + THUMB( lsr r3, #3 ) + THUMB( ldrb r3, [r0, r3] ) + eors r3, r3, #0xff @ invert bits + bne .L_found @ any now set - found zero bit + add r2, r2, #8 @ next bit pointer +2: cmp r2, r1 @ any more? + blo 1b +3: mov r0, r1 @ no free bits + ret lr +ENDPROC(_find_first_zero_bit_be) + +ENTRY(_find_next_zero_bit_be) + teq r1, #0 + beq 3b + ands ip, r2, #7 + beq 1b @ If new byte, goto old routine + eor r3, r2, #0x18 @ big endian byte ordering + ARM( ldrb r3, [r0, r3, lsr #3] ) + THUMB( lsr r3, #3 ) + THUMB( ldrb r3, [r0, r3] ) + eor r3, r3, #0xff @ now looking for a 1 bit + movs r3, r3, lsr ip @ shift off unused bits + bne .L_found + orr r2, r2, #7 @ if zero, then no bits here + add r2, r2, #1 @ align bit pointer + b 2b @ loop for next bit +ENDPROC(_find_next_zero_bit_be) + +ENTRY(_find_first_bit_be) + teq r1, #0 + beq 3f + mov r2, #0 +1: eor r3, r2, #0x18 @ big endian byte ordering + ARM( ldrb r3, [r0, r3, lsr #3] ) + THUMB( lsr r3, #3 ) + THUMB( ldrb r3, [r0, r3] ) + movs r3, r3 + bne .L_found @ any now set - found zero bit + add r2, r2, #8 @ next bit pointer +2: cmp r2, r1 @ any more? + blo 1b +3: mov r0, r1 @ no free bits + ret lr +ENDPROC(_find_first_bit_be) + +ENTRY(_find_next_bit_be) + teq r1, #0 + beq 3b + ands ip, r2, #7 + beq 1b @ If new byte, goto old routine + eor r3, r2, #0x18 @ big endian byte ordering + ARM( ldrb r3, [r0, r3, lsr #3] ) + THUMB( lsr r3, #3 ) + THUMB( ldrb r3, [r0, r3] ) + movs r3, r3, lsr ip @ shift off unused bits + bne .L_found + orr r2, r2, #7 @ if zero, then no bits here + add r2, r2, #1 @ align bit pointer + b 2b @ loop for next bit +ENDPROC(_find_next_bit_be) + +#endif + +/* + * One or more bits in the LSB of r3 are assumed to be set. + */ +.L_found: +#if __LINUX_ARM_ARCH__ >= 5 + rsb r0, r3, #0 + and r3, r3, r0 + clz r3, r3 + rsb r3, r3, #31 + add r0, r2, r3 +#else + tst r3, #0x0f + addeq r2, r2, #4 + movne r3, r3, lsl #4 + tst r3, #0x30 + addeq r2, r2, #2 + movne r3, r3, lsl #2 + tst r3, #0x40 + addeq r2, r2, #1 + mov r0, r2 +#endif + cmp r1, r0 @ Clamp to maxbit + movlo r0, r1 + ret lr + diff --git a/kernel/arch/arm/lib/floppydma.S b/kernel/arch/arm/lib/floppydma.S new file mode 100644 index 000000000..617150b1b --- /dev/null +++ b/kernel/arch/arm/lib/floppydma.S @@ -0,0 +1,32 @@ +/* + * linux/arch/arm/lib/floppydma.S + * + * Copyright (C) 1995, 1996 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include + .text + + .global floppy_fiqin_end +ENTRY(floppy_fiqin_start) + subs r9, r9, #1 + ldrgtb r12, [r11, #-4] + ldrleb r12, [r11], #0 + strb r12, [r10], #1 + subs pc, lr, #4 +floppy_fiqin_end: + + .global floppy_fiqout_end +ENTRY(floppy_fiqout_start) + subs r9, r9, #1 + ldrgeb r12, [r10], #1 + movlt r12, #0 + strleb r12, [r11], #0 + subles pc, lr, #4 + strb r12, [r11, #-4] + subs pc, lr, #4 +floppy_fiqout_end: diff --git a/kernel/arch/arm/lib/getuser.S b/kernel/arch/arm/lib/getuser.S new file mode 100644 index 000000000..8ecfd15c3 --- /dev/null +++ b/kernel/arch/arm/lib/getuser.S @@ -0,0 +1,149 @@ +/* + * linux/arch/arm/lib/getuser.S + * + * Copyright (C) 2001 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Idea from x86 version, (C) Copyright 1998 Linus Torvalds + * + * These functions have a non-standard call interface to make them more + * efficient, especially as they return an error value in addition to + * the "real" return value. + * + * __get_user_X + * + * Inputs: r0 contains the address + * r1 contains the address limit, which must be preserved + * Outputs: r0 is the error code + * r2, r3 contains the zero-extended value + * lr corrupted + * + * No other registers must be altered. (see + * for specific ASM register usage). + * + * Note that ADDR_LIMIT is either 0 or 0xc0000000. + * Note also that it is intended that __get_user_bad is not global. + */ +#include +#include +#include +#include + +ENTRY(__get_user_1) + check_uaccess r0, 1, r1, r2, __get_user_bad +1: TUSER(ldrb) r2, [r0] + mov r0, #0 + ret lr +ENDPROC(__get_user_1) + +ENTRY(__get_user_2) + check_uaccess r0, 2, r1, r2, __get_user_bad +#ifdef CONFIG_CPU_USE_DOMAINS +rb .req ip +2: ldrbt r2, [r0], #1 +3: ldrbt rb, [r0], #0 +#else +rb .req r0 +2: ldrb r2, [r0] +3: ldrb rb, [r0, #1] +#endif +#ifndef __ARMEB__ + orr r2, r2, rb, lsl #8 +#else + orr r2, rb, r2, lsl #8 +#endif + mov r0, #0 + ret lr +ENDPROC(__get_user_2) + +ENTRY(__get_user_4) + check_uaccess r0, 4, r1, r2, __get_user_bad +4: TUSER(ldr) r2, [r0] + mov r0, #0 + ret lr +ENDPROC(__get_user_4) + +ENTRY(__get_user_8) + check_uaccess r0, 8, r1, r2, __get_user_bad +#ifdef CONFIG_THUMB2_KERNEL +5: TUSER(ldr) r2, [r0] +6: TUSER(ldr) r3, [r0, #4] +#else +5: TUSER(ldr) r2, [r0], #4 +6: TUSER(ldr) r3, [r0] +#endif + mov r0, #0 + ret lr +ENDPROC(__get_user_8) + +#ifdef __ARMEB__ +ENTRY(__get_user_32t_8) + check_uaccess r0, 8, r1, r2, __get_user_bad +#ifdef CONFIG_CPU_USE_DOMAINS + add r0, r0, #4 +7: ldrt r2, [r0] +#else +7: ldr r2, [r0, #4] +#endif + mov r0, #0 + ret lr +ENDPROC(__get_user_32t_8) + +ENTRY(__get_user_64t_1) + check_uaccess r0, 1, r1, r2, __get_user_bad8 +8: TUSER(ldrb) r3, [r0] + mov r0, #0 + ret lr +ENDPROC(__get_user_64t_1) + +ENTRY(__get_user_64t_2) + check_uaccess r0, 2, r1, r2, __get_user_bad8 +#ifdef CONFIG_CPU_USE_DOMAINS +rb .req ip +9: ldrbt r3, [r0], #1 +10: ldrbt rb, [r0], #0 +#else +rb .req r0 +9: ldrb r3, [r0] +10: ldrb rb, [r0, #1] +#endif + orr r3, rb, r3, lsl #8 + mov r0, #0 + ret lr +ENDPROC(__get_user_64t_2) + +ENTRY(__get_user_64t_4) + check_uaccess r0, 4, r1, r2, __get_user_bad8 +11: TUSER(ldr) r3, [r0] + mov r0, #0 + ret lr +ENDPROC(__get_user_64t_4) +#endif + +__get_user_bad8: + mov r3, #0 +__get_user_bad: + mov r2, #0 + mov r0, #-EFAULT + ret lr +ENDPROC(__get_user_bad) +ENDPROC(__get_user_bad8) + +.pushsection __ex_table, "a" + .long 1b, __get_user_bad + .long 2b, __get_user_bad + .long 3b, __get_user_bad + .long 4b, __get_user_bad + .long 5b, __get_user_bad8 + .long 6b, __get_user_bad8 +#ifdef __ARMEB__ + .long 7b, __get_user_bad + .long 8b, __get_user_bad8 + .long 9b, __get_user_bad8 + .long 10b, __get_user_bad8 + .long 11b, __get_user_bad8 +#endif +.popsection diff --git a/kernel/arch/arm/lib/io-acorn.S b/kernel/arch/arm/lib/io-acorn.S new file mode 100644 index 000000000..69719bad6 --- /dev/null +++ b/kernel/arch/arm/lib/io-acorn.S @@ -0,0 +1,32 @@ +/* + * linux/arch/arm/lib/io-acorn.S + * + * Copyright (C) 1995, 1996 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * 27/03/03 Ian Molton Clean up CONFIG_CPU + * + */ +#include +#include +#include + + .text + .align + +.Liosl_warning: + .ascii KERN_WARNING "insl/outsl not implemented, called from %08lX\0" + .align + +/* + * These make no sense on Acorn machines. + * Print a warning message. + */ +ENTRY(insl) +ENTRY(outsl) + adr r0, .Liosl_warning + mov r1, lr + b printk diff --git a/kernel/arch/arm/lib/io-readsb.S b/kernel/arch/arm/lib/io-readsb.S new file mode 100644 index 000000000..c31b2f315 --- /dev/null +++ b/kernel/arch/arm/lib/io-readsb.S @@ -0,0 +1,123 @@ +/* + * linux/arch/arm/lib/io-readsb.S + * + * Copyright (C) 1995-2000 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include + +.Linsb_align: rsb ip, ip, #4 + cmp ip, r2 + movgt ip, r2 + cmp ip, #2 + ldrb r3, [r0] + strb r3, [r1], #1 + ldrgeb r3, [r0] + strgeb r3, [r1], #1 + ldrgtb r3, [r0] + strgtb r3, [r1], #1 + subs r2, r2, ip + bne .Linsb_aligned + +ENTRY(__raw_readsb) + teq r2, #0 @ do we have to check for the zero len? + reteq lr + ands ip, r1, #3 + bne .Linsb_align + +.Linsb_aligned: stmfd sp!, {r4 - r6, lr} + + subs r2, r2, #16 + bmi .Linsb_no_16 + +.Linsb_16_lp: ldrb r3, [r0] + ldrb r4, [r0] + ldrb r5, [r0] + mov r3, r3, put_byte_0 + ldrb r6, [r0] + orr r3, r3, r4, put_byte_1 + ldrb r4, [r0] + orr r3, r3, r5, put_byte_2 + ldrb r5, [r0] + orr r3, r3, r6, put_byte_3 + ldrb r6, [r0] + mov r4, r4, put_byte_0 + ldrb ip, [r0] + orr r4, r4, r5, put_byte_1 + ldrb r5, [r0] + orr r4, r4, r6, put_byte_2 + ldrb r6, [r0] + orr r4, r4, ip, put_byte_3 + ldrb ip, [r0] + mov r5, r5, put_byte_0 + ldrb lr, [r0] + orr r5, r5, r6, put_byte_1 + ldrb r6, [r0] + orr r5, r5, ip, put_byte_2 + ldrb ip, [r0] + orr r5, r5, lr, put_byte_3 + ldrb lr, [r0] + mov r6, r6, put_byte_0 + orr r6, r6, ip, put_byte_1 + ldrb ip, [r0] + orr r6, r6, lr, put_byte_2 + orr r6, r6, ip, put_byte_3 + stmia r1!, {r3 - r6} + + subs r2, r2, #16 + bpl .Linsb_16_lp + + tst r2, #15 + ldmeqfd sp!, {r4 - r6, pc} + +.Linsb_no_16: tst r2, #8 + beq .Linsb_no_8 + + ldrb r3, [r0] + ldrb r4, [r0] + ldrb r5, [r0] + mov r3, r3, put_byte_0 + ldrb r6, [r0] + orr r3, r3, r4, put_byte_1 + ldrb r4, [r0] + orr r3, r3, r5, put_byte_2 + ldrb r5, [r0] + orr r3, r3, r6, put_byte_3 + ldrb r6, [r0] + mov r4, r4, put_byte_0 + ldrb ip, [r0] + orr r4, r4, r5, put_byte_1 + orr r4, r4, r6, put_byte_2 + orr r4, r4, ip, put_byte_3 + stmia r1!, {r3, r4} + +.Linsb_no_8: tst r2, #4 + beq .Linsb_no_4 + + ldrb r3, [r0] + ldrb r4, [r0] + ldrb r5, [r0] + ldrb r6, [r0] + mov r3, r3, put_byte_0 + orr r3, r3, r4, put_byte_1 + orr r3, r3, r5, put_byte_2 + orr r3, r3, r6, put_byte_3 + str r3, [r1], #4 + +.Linsb_no_4: ands r2, r2, #3 + ldmeqfd sp!, {r4 - r6, pc} + + cmp r2, #2 + ldrb r3, [r0] + strb r3, [r1], #1 + ldrgeb r3, [r0] + strgeb r3, [r1], #1 + ldrgtb r3, [r0] + strgtb r3, [r1] + + ldmfd sp!, {r4 - r6, pc} +ENDPROC(__raw_readsb) diff --git a/kernel/arch/arm/lib/io-readsl.S b/kernel/arch/arm/lib/io-readsl.S new file mode 100644 index 000000000..2ed86fa54 --- /dev/null +++ b/kernel/arch/arm/lib/io-readsl.S @@ -0,0 +1,79 @@ +/* + * linux/arch/arm/lib/io-readsl.S + * + * Copyright (C) 1995-2000 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include + +ENTRY(__raw_readsl) + teq r2, #0 @ do we have to check for the zero len? + reteq lr + ands ip, r1, #3 + bne 3f + + subs r2, r2, #4 + bmi 2f + stmfd sp!, {r4, lr} +1: ldr r3, [r0, #0] + ldr r4, [r0, #0] + ldr ip, [r0, #0] + ldr lr, [r0, #0] + subs r2, r2, #4 + stmia r1!, {r3, r4, ip, lr} + bpl 1b + ldmfd sp!, {r4, lr} +2: movs r2, r2, lsl #31 + ldrcs r3, [r0, #0] + ldrcs ip, [r0, #0] + stmcsia r1!, {r3, ip} + ldrne r3, [r0, #0] + strne r3, [r1, #0] + ret lr + +3: ldr r3, [r0] + cmp ip, #2 + mov ip, r3, get_byte_0 + strb ip, [r1], #1 + bgt 6f + mov ip, r3, get_byte_1 + strb ip, [r1], #1 + beq 5f + mov ip, r3, get_byte_2 + strb ip, [r1], #1 + +4: subs r2, r2, #1 + mov ip, r3, lspull #24 + ldrne r3, [r0] + orrne ip, ip, r3, lspush #8 + strne ip, [r1], #4 + bne 4b + b 8f + +5: subs r2, r2, #1 + mov ip, r3, lspull #16 + ldrne r3, [r0] + orrne ip, ip, r3, lspush #16 + strne ip, [r1], #4 + bne 5b + b 7f + +6: subs r2, r2, #1 + mov ip, r3, lspull #8 + ldrne r3, [r0] + orrne ip, ip, r3, lspush #24 + strne ip, [r1], #4 + bne 6b + + mov r3, ip, get_byte_2 + strb r3, [r1, #2] +7: mov r3, ip, get_byte_1 + strb r3, [r1, #1] +8: mov r3, ip, get_byte_0 + strb r3, [r1, #0] + ret lr +ENDPROC(__raw_readsl) diff --git a/kernel/arch/arm/lib/io-readsw-armv3.S b/kernel/arch/arm/lib/io-readsw-armv3.S new file mode 100644 index 000000000..413da9914 --- /dev/null +++ b/kernel/arch/arm/lib/io-readsw-armv3.S @@ -0,0 +1,106 @@ +/* + * linux/arch/arm/lib/io-readsw-armv3.S + * + * Copyright (C) 1995-2000 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include + +.Linsw_bad_alignment: + adr r0, .Linsw_bad_align_msg + mov r2, lr + b panic +.Linsw_bad_align_msg: + .asciz "insw: bad buffer alignment (0x%p, lr=0x%08lX)\n" + .align + +.Linsw_align: tst r1, #1 + bne .Linsw_bad_alignment + + ldr r3, [r0] + strb r3, [r1], #1 + mov r3, r3, lsr #8 + strb r3, [r1], #1 + + subs r2, r2, #1 + reteq lr + +ENTRY(__raw_readsw) + teq r2, #0 @ do we have to check for the zero len? + reteq lr + tst r1, #3 + bne .Linsw_align + +.Linsw_aligned: mov ip, #0xff + orr ip, ip, ip, lsl #8 + stmfd sp!, {r4, r5, r6, lr} + + subs r2, r2, #8 + bmi .Lno_insw_8 + +.Linsw_8_lp: ldr r3, [r0] + and r3, r3, ip + ldr r4, [r0] + orr r3, r3, r4, lsl #16 + + ldr r4, [r0] + and r4, r4, ip + ldr r5, [r0] + orr r4, r4, r5, lsl #16 + + ldr r5, [r0] + and r5, r5, ip + ldr r6, [r0] + orr r5, r5, r6, lsl #16 + + ldr r6, [r0] + and r6, r6, ip + ldr lr, [r0] + orr r6, r6, lr, lsl #16 + + stmia r1!, {r3 - r6} + + subs r2, r2, #8 + bpl .Linsw_8_lp + + tst r2, #7 + ldmeqfd sp!, {r4, r5, r6, pc} + +.Lno_insw_8: tst r2, #4 + beq .Lno_insw_4 + + ldr r3, [r0] + and r3, r3, ip + ldr r4, [r0] + orr r3, r3, r4, lsl #16 + + ldr r4, [r0] + and r4, r4, ip + ldr r5, [r0] + orr r4, r4, r5, lsl #16 + + stmia r1!, {r3, r4} + +.Lno_insw_4: tst r2, #2 + beq .Lno_insw_2 + + ldr r3, [r0] + and r3, r3, ip + ldr r4, [r0] + orr r3, r3, r4, lsl #16 + + str r3, [r1], #4 + +.Lno_insw_2: tst r2, #1 + ldrne r3, [r0] + strneb r3, [r1], #1 + movne r3, r3, lsr #8 + strneb r3, [r1] + + ldmfd sp!, {r4, r5, r6, pc} + + diff --git a/kernel/arch/arm/lib/io-readsw-armv4.S b/kernel/arch/arm/lib/io-readsw-armv4.S new file mode 100644 index 000000000..d9a45e969 --- /dev/null +++ b/kernel/arch/arm/lib/io-readsw-armv4.S @@ -0,0 +1,131 @@ +/* + * linux/arch/arm/lib/io-readsw-armv4.S + * + * Copyright (C) 1995-2000 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include + + .macro pack, rd, hw1, hw2 +#ifndef __ARMEB__ + orr \rd, \hw1, \hw2, lsl #16 +#else + orr \rd, \hw2, \hw1, lsl #16 +#endif + .endm + +.Linsw_align: movs ip, r1, lsl #31 + bne .Linsw_noalign + ldrh ip, [r0] + sub r2, r2, #1 + strh ip, [r1], #2 + +ENTRY(__raw_readsw) + teq r2, #0 + reteq lr + tst r1, #3 + bne .Linsw_align + + stmfd sp!, {r4, r5, lr} + + subs r2, r2, #8 + bmi .Lno_insw_8 + +.Linsw_8_lp: ldrh r3, [r0] + ldrh r4, [r0] + pack r3, r3, r4 + + ldrh r4, [r0] + ldrh r5, [r0] + pack r4, r4, r5 + + ldrh r5, [r0] + ldrh ip, [r0] + pack r5, r5, ip + + ldrh ip, [r0] + ldrh lr, [r0] + pack ip, ip, lr + + subs r2, r2, #8 + stmia r1!, {r3 - r5, ip} + bpl .Linsw_8_lp + +.Lno_insw_8: tst r2, #4 + beq .Lno_insw_4 + + ldrh r3, [r0] + ldrh r4, [r0] + pack r3, r3, r4 + + ldrh r4, [r0] + ldrh ip, [r0] + pack r4, r4, ip + + stmia r1!, {r3, r4} + +.Lno_insw_4: movs r2, r2, lsl #31 + bcc .Lno_insw_2 + + ldrh r3, [r0] + ldrh ip, [r0] + pack r3, r3, ip + str r3, [r1], #4 + +.Lno_insw_2: ldrneh r3, [r0] + strneh r3, [r1] + + ldmfd sp!, {r4, r5, pc} + +#ifdef __ARMEB__ +#define _BE_ONLY_(code...) code +#define _LE_ONLY_(code...) +#define push_hbyte0 lsr #8 +#define pull_hbyte1 lsl #24 +#else +#define _BE_ONLY_(code...) +#define _LE_ONLY_(code...) code +#define push_hbyte0 lsl #24 +#define pull_hbyte1 lsr #8 +#endif + +.Linsw_noalign: stmfd sp!, {r4, lr} + ldrccb ip, [r1, #-1]! + bcc 1f + + ldrh ip, [r0] + sub r2, r2, #1 + _BE_ONLY_( mov ip, ip, ror #8 ) + strb ip, [r1], #1 + _LE_ONLY_( mov ip, ip, lsr #8 ) + _BE_ONLY_( mov ip, ip, lsr #24 ) + +1: subs r2, r2, #2 + bmi 3f + _BE_ONLY_( mov ip, ip, lsl #24 ) + +2: ldrh r3, [r0] + ldrh r4, [r0] + subs r2, r2, #2 + orr ip, ip, r3, lsl #8 + orr ip, ip, r4, push_hbyte0 + str ip, [r1], #4 + mov ip, r4, pull_hbyte1 + bpl 2b + + _BE_ONLY_( mov ip, ip, lsr #24 ) + +3: tst r2, #1 + strb ip, [r1], #1 + ldrneh ip, [r0] + _BE_ONLY_( movne ip, ip, ror #8 ) + strneb ip, [r1], #1 + _LE_ONLY_( movne ip, ip, lsr #8 ) + _BE_ONLY_( movne ip, ip, lsr #24 ) + strneb ip, [r1] + ldmfd sp!, {r4, pc} +ENDPROC(__raw_readsw) diff --git a/kernel/arch/arm/lib/io-writesb.S b/kernel/arch/arm/lib/io-writesb.S new file mode 100644 index 000000000..a46bbc9b1 --- /dev/null +++ b/kernel/arch/arm/lib/io-writesb.S @@ -0,0 +1,94 @@ +/* + * linux/arch/arm/lib/io-writesb.S + * + * Copyright (C) 1995-2000 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include + + .macro outword, rd +#ifndef __ARMEB__ + strb \rd, [r0] + mov \rd, \rd, lsr #8 + strb \rd, [r0] + mov \rd, \rd, lsr #8 + strb \rd, [r0] + mov \rd, \rd, lsr #8 + strb \rd, [r0] +#else + mov lr, \rd, lsr #24 + strb lr, [r0] + mov lr, \rd, lsr #16 + strb lr, [r0] + mov lr, \rd, lsr #8 + strb lr, [r0] + strb \rd, [r0] +#endif + .endm + +.Loutsb_align: rsb ip, ip, #4 + cmp ip, r2 + movgt ip, r2 + cmp ip, #2 + ldrb r3, [r1], #1 + strb r3, [r0] + ldrgeb r3, [r1], #1 + strgeb r3, [r0] + ldrgtb r3, [r1], #1 + strgtb r3, [r0] + subs r2, r2, ip + bne .Loutsb_aligned + +ENTRY(__raw_writesb) + teq r2, #0 @ do we have to check for the zero len? + reteq lr + ands ip, r1, #3 + bne .Loutsb_align + +.Loutsb_aligned: + stmfd sp!, {r4, r5, lr} + + subs r2, r2, #16 + bmi .Loutsb_no_16 + +.Loutsb_16_lp: ldmia r1!, {r3, r4, r5, ip} + outword r3 + outword r4 + outword r5 + outword ip + subs r2, r2, #16 + bpl .Loutsb_16_lp + + tst r2, #15 + ldmeqfd sp!, {r4, r5, pc} + +.Loutsb_no_16: tst r2, #8 + beq .Loutsb_no_8 + + ldmia r1!, {r3, r4} + outword r3 + outword r4 + +.Loutsb_no_8: tst r2, #4 + beq .Loutsb_no_4 + + ldr r3, [r1], #4 + outword r3 + +.Loutsb_no_4: ands r2, r2, #3 + ldmeqfd sp!, {r4, r5, pc} + + cmp r2, #2 + ldrb r3, [r1], #1 + strb r3, [r0] + ldrgeb r3, [r1], #1 + strgeb r3, [r0] + ldrgtb r3, [r1] + strgtb r3, [r0] + + ldmfd sp!, {r4, r5, pc} +ENDPROC(__raw_writesb) diff --git a/kernel/arch/arm/lib/io-writesl.S b/kernel/arch/arm/lib/io-writesl.S new file mode 100644 index 000000000..4ea243598 --- /dev/null +++ b/kernel/arch/arm/lib/io-writesl.S @@ -0,0 +1,67 @@ +/* + * linux/arch/arm/lib/io-writesl.S + * + * Copyright (C) 1995-2000 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include + +ENTRY(__raw_writesl) + teq r2, #0 @ do we have to check for the zero len? + reteq lr + ands ip, r1, #3 + bne 3f + + subs r2, r2, #4 + bmi 2f + stmfd sp!, {r4, lr} +1: ldmia r1!, {r3, r4, ip, lr} + subs r2, r2, #4 + str r3, [r0, #0] + str r4, [r0, #0] + str ip, [r0, #0] + str lr, [r0, #0] + bpl 1b + ldmfd sp!, {r4, lr} +2: movs r2, r2, lsl #31 + ldmcsia r1!, {r3, ip} + strcs r3, [r0, #0] + ldrne r3, [r1, #0] + strcs ip, [r0, #0] + strne r3, [r0, #0] + ret lr + +3: bic r1, r1, #3 + ldr r3, [r1], #4 + cmp ip, #2 + blt 5f + bgt 6f + +4: mov ip, r3, lspull #16 + ldr r3, [r1], #4 + subs r2, r2, #1 + orr ip, ip, r3, lspush #16 + str ip, [r0] + bne 4b + ret lr + +5: mov ip, r3, lspull #8 + ldr r3, [r1], #4 + subs r2, r2, #1 + orr ip, ip, r3, lspush #24 + str ip, [r0] + bne 5b + ret lr + +6: mov ip, r3, lspull #24 + ldr r3, [r1], #4 + subs r2, r2, #1 + orr ip, ip, r3, lspush #8 + str ip, [r0] + bne 6b + ret lr +ENDPROC(__raw_writesl) diff --git a/kernel/arch/arm/lib/io-writesw-armv3.S b/kernel/arch/arm/lib/io-writesw-armv3.S new file mode 100644 index 000000000..121789eb6 --- /dev/null +++ b/kernel/arch/arm/lib/io-writesw-armv3.S @@ -0,0 +1,126 @@ +/* + * linux/arch/arm/lib/io-writesw-armv3.S + * + * Copyright (C) 1995-2000 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include + +.Loutsw_bad_alignment: + adr r0, .Loutsw_bad_align_msg + mov r2, lr + b panic +.Loutsw_bad_align_msg: + .asciz "outsw: bad buffer alignment (0x%p, lr=0x%08lX)\n" + .align + +.Loutsw_align: tst r1, #1 + bne .Loutsw_bad_alignment + + add r1, r1, #2 + + ldr r3, [r1, #-4] + mov r3, r3, lsr #16 + orr r3, r3, r3, lsl #16 + str r3, [r0] + subs r2, r2, #1 + reteq lr + +ENTRY(__raw_writesw) + teq r2, #0 @ do we have to check for the zero len? + reteq lr + tst r1, #3 + bne .Loutsw_align + + stmfd sp!, {r4, r5, r6, lr} + + subs r2, r2, #8 + bmi .Lno_outsw_8 + +.Loutsw_8_lp: ldmia r1!, {r3, r4, r5, r6} + + mov ip, r3, lsl #16 + orr ip, ip, ip, lsr #16 + str ip, [r0] + + mov ip, r3, lsr #16 + orr ip, ip, ip, lsl #16 + str ip, [r0] + + mov ip, r4, lsl #16 + orr ip, ip, ip, lsr #16 + str ip, [r0] + + mov ip, r4, lsr #16 + orr ip, ip, ip, lsl #16 + str ip, [r0] + + mov ip, r5, lsl #16 + orr ip, ip, ip, lsr #16 + str ip, [r0] + + mov ip, r5, lsr #16 + orr ip, ip, ip, lsl #16 + str ip, [r0] + + mov ip, r6, lsl #16 + orr ip, ip, ip, lsr #16 + str ip, [r0] + + mov ip, r6, lsr #16 + orr ip, ip, ip, lsl #16 + str ip, [r0] + + subs r2, r2, #8 + bpl .Loutsw_8_lp + + tst r2, #7 + ldmeqfd sp!, {r4, r5, r6, pc} + +.Lno_outsw_8: tst r2, #4 + beq .Lno_outsw_4 + + ldmia r1!, {r3, r4} + + mov ip, r3, lsl #16 + orr ip, ip, ip, lsr #16 + str ip, [r0] + + mov ip, r3, lsr #16 + orr ip, ip, ip, lsl #16 + str ip, [r0] + + mov ip, r4, lsl #16 + orr ip, ip, ip, lsr #16 + str ip, [r0] + + mov ip, r4, lsr #16 + orr ip, ip, ip, lsl #16 + str ip, [r0] + +.Lno_outsw_4: tst r2, #2 + beq .Lno_outsw_2 + + ldr r3, [r1], #4 + + mov ip, r3, lsl #16 + orr ip, ip, ip, lsr #16 + str ip, [r0] + + mov ip, r3, lsr #16 + orr ip, ip, ip, lsl #16 + str ip, [r0] + +.Lno_outsw_2: tst r2, #1 + + ldrne r3, [r1] + + movne ip, r3, lsl #16 + orrne ip, ip, ip, lsr #16 + strne ip, [r0] + + ldmfd sp!, {r4, r5, r6, pc} diff --git a/kernel/arch/arm/lib/io-writesw-armv4.S b/kernel/arch/arm/lib/io-writesw-armv4.S new file mode 100644 index 000000000..269f90c51 --- /dev/null +++ b/kernel/arch/arm/lib/io-writesw-armv4.S @@ -0,0 +1,100 @@ +/* + * linux/arch/arm/lib/io-writesw-armv4.S + * + * Copyright (C) 1995-2000 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include + + .macro outword, rd +#ifndef __ARMEB__ + strh \rd, [r0] + mov \rd, \rd, lsr #16 + strh \rd, [r0] +#else + mov lr, \rd, lsr #16 + strh lr, [r0] + strh \rd, [r0] +#endif + .endm + +.Loutsw_align: movs ip, r1, lsl #31 + bne .Loutsw_noalign + + ldrh r3, [r1], #2 + sub r2, r2, #1 + strh r3, [r0] + +ENTRY(__raw_writesw) + teq r2, #0 + reteq lr + ands r3, r1, #3 + bne .Loutsw_align + + stmfd sp!, {r4, r5, lr} + + subs r2, r2, #8 + bmi .Lno_outsw_8 + +.Loutsw_8_lp: ldmia r1!, {r3, r4, r5, ip} + subs r2, r2, #8 + outword r3 + outword r4 + outword r5 + outword ip + bpl .Loutsw_8_lp + +.Lno_outsw_8: tst r2, #4 + beq .Lno_outsw_4 + + ldmia r1!, {r3, ip} + outword r3 + outword ip + +.Lno_outsw_4: movs r2, r2, lsl #31 + bcc .Lno_outsw_2 + + ldr r3, [r1], #4 + outword r3 + +.Lno_outsw_2: ldrneh r3, [r1] + strneh r3, [r0] + + ldmfd sp!, {r4, r5, pc} + +#ifdef __ARMEB__ +#define pull_hbyte0 lsl #8 +#define push_hbyte1 lsr #24 +#else +#define pull_hbyte0 lsr #24 +#define push_hbyte1 lsl #8 +#endif + +.Loutsw_noalign: + ARM( ldr r3, [r1, -r3]! ) + THUMB( rsb r3, r3, #0 ) + THUMB( ldr r3, [r1, r3] ) + THUMB( sub r1, r3 ) + subcs r2, r2, #1 + bcs 2f + subs r2, r2, #2 + bmi 3f + +1: mov ip, r3, lsr #8 + strh ip, [r0] +2: mov ip, r3, pull_hbyte0 + ldr r3, [r1, #4]! + subs r2, r2, #2 + orr ip, ip, r3, push_hbyte1 + strh ip, [r0] + bpl 1b + + tst r2, #1 +3: movne ip, r3, lsr #8 + strneh ip, [r0] + ret lr +ENDPROC(__raw_writesw) diff --git a/kernel/arch/arm/lib/lib1funcs.S b/kernel/arch/arm/lib/lib1funcs.S new file mode 100644 index 000000000..947567ff6 --- /dev/null +++ b/kernel/arch/arm/lib/lib1funcs.S @@ -0,0 +1,363 @@ +/* + * linux/arch/arm/lib/lib1funcs.S: Optimized ARM division routines + * + * Author: Nicolas Pitre + * - contributed to gcc-3.4 on Sep 30, 2003 + * - adapted for the Linux kernel on Oct 2, 2003 + */ + +/* Copyright 1995, 1996, 1998, 1999, 2000, 2003 Free Software Foundation, Inc. + +This file is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +In addition to the permissions in the GNU General Public License, the +Free Software Foundation gives you unlimited permission to link the +compiled version of this file into combinations with other programs, +and to distribute those combinations without any restriction coming +from the use of this file. (The General Public License restrictions +do apply in other respects; for example, they cover modification of +the file, and distribution when not linked into a combine +executable.) + +This file is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; see the file COPYING. If not, write to +the Free Software Foundation, 59 Temple Place - Suite 330, +Boston, MA 02111-1307, USA. */ + + +#include +#include +#include + +.macro ARM_DIV_BODY dividend, divisor, result, curbit + +#if __LINUX_ARM_ARCH__ >= 5 + + clz \curbit, \divisor + clz \result, \dividend + sub \result, \curbit, \result + mov \curbit, #1 + mov \divisor, \divisor, lsl \result + mov \curbit, \curbit, lsl \result + mov \result, #0 + +#else + + @ Initially shift the divisor left 3 bits if possible, + @ set curbit accordingly. This allows for curbit to be located + @ at the left end of each 4 bit nibbles in the division loop + @ to save one loop in most cases. + tst \divisor, #0xe0000000 + moveq \divisor, \divisor, lsl #3 + moveq \curbit, #8 + movne \curbit, #1 + + @ Unless the divisor is very big, shift it up in multiples of + @ four bits, since this is the amount of unwinding in the main + @ division loop. Continue shifting until the divisor is + @ larger than the dividend. +1: cmp \divisor, #0x10000000 + cmplo \divisor, \dividend + movlo \divisor, \divisor, lsl #4 + movlo \curbit, \curbit, lsl #4 + blo 1b + + @ For very big divisors, we must shift it a bit at a time, or + @ we will be in danger of overflowing. +1: cmp \divisor, #0x80000000 + cmplo \divisor, \dividend + movlo \divisor, \divisor, lsl #1 + movlo \curbit, \curbit, lsl #1 + blo 1b + + mov \result, #0 + +#endif + + @ Division loop +1: cmp \dividend, \divisor + subhs \dividend, \dividend, \divisor + orrhs \result, \result, \curbit + cmp \dividend, \divisor, lsr #1 + subhs \dividend, \dividend, \divisor, lsr #1 + orrhs \result, \result, \curbit, lsr #1 + cmp \dividend, \divisor, lsr #2 + subhs \dividend, \dividend, \divisor, lsr #2 + orrhs \result, \result, \curbit, lsr #2 + cmp \dividend, \divisor, lsr #3 + subhs \dividend, \dividend, \divisor, lsr #3 + orrhs \result, \result, \curbit, lsr #3 + cmp \dividend, #0 @ Early termination? + movnes \curbit, \curbit, lsr #4 @ No, any more bits to do? + movne \divisor, \divisor, lsr #4 + bne 1b + +.endm + + +.macro ARM_DIV2_ORDER divisor, order + +#if __LINUX_ARM_ARCH__ >= 5 + + clz \order, \divisor + rsb \order, \order, #31 + +#else + + cmp \divisor, #(1 << 16) + movhs \divisor, \divisor, lsr #16 + movhs \order, #16 + movlo \order, #0 + + cmp \divisor, #(1 << 8) + movhs \divisor, \divisor, lsr #8 + addhs \order, \order, #8 + + cmp \divisor, #(1 << 4) + movhs \divisor, \divisor, lsr #4 + addhs \order, \order, #4 + + cmp \divisor, #(1 << 2) + addhi \order, \order, #3 + addls \order, \order, \divisor, lsr #1 + +#endif + +.endm + + +.macro ARM_MOD_BODY dividend, divisor, order, spare + +#if __LINUX_ARM_ARCH__ >= 5 + + clz \order, \divisor + clz \spare, \dividend + sub \order, \order, \spare + mov \divisor, \divisor, lsl \order + +#else + + mov \order, #0 + + @ Unless the divisor is very big, shift it up in multiples of + @ four bits, since this is the amount of unwinding in the main + @ division loop. Continue shifting until the divisor is + @ larger than the dividend. +1: cmp \divisor, #0x10000000 + cmplo \divisor, \dividend + movlo \divisor, \divisor, lsl #4 + addlo \order, \order, #4 + blo 1b + + @ For very big divisors, we must shift it a bit at a time, or + @ we will be in danger of overflowing. +1: cmp \divisor, #0x80000000 + cmplo \divisor, \dividend + movlo \divisor, \divisor, lsl #1 + addlo \order, \order, #1 + blo 1b + +#endif + + @ Perform all needed substractions to keep only the reminder. + @ Do comparisons in batch of 4 first. + subs \order, \order, #3 @ yes, 3 is intended here + blt 2f + +1: cmp \dividend, \divisor + subhs \dividend, \dividend, \divisor + cmp \dividend, \divisor, lsr #1 + subhs \dividend, \dividend, \divisor, lsr #1 + cmp \dividend, \divisor, lsr #2 + subhs \dividend, \dividend, \divisor, lsr #2 + cmp \dividend, \divisor, lsr #3 + subhs \dividend, \dividend, \divisor, lsr #3 + cmp \dividend, #1 + mov \divisor, \divisor, lsr #4 + subges \order, \order, #4 + bge 1b + + tst \order, #3 + teqne \dividend, #0 + beq 5f + + @ Either 1, 2 or 3 comparison/substractions are left. +2: cmn \order, #2 + blt 4f + beq 3f + cmp \dividend, \divisor + subhs \dividend, \dividend, \divisor + mov \divisor, \divisor, lsr #1 +3: cmp \dividend, \divisor + subhs \dividend, \dividend, \divisor + mov \divisor, \divisor, lsr #1 +4: cmp \dividend, \divisor + subhs \dividend, \dividend, \divisor +5: +.endm + + +ENTRY(__udivsi3) +ENTRY(__aeabi_uidiv) +UNWIND(.fnstart) + + subs r2, r1, #1 + reteq lr + bcc Ldiv0 + cmp r0, r1 + bls 11f + tst r1, r2 + beq 12f + + ARM_DIV_BODY r0, r1, r2, r3 + + mov r0, r2 + ret lr + +11: moveq r0, #1 + movne r0, #0 + ret lr + +12: ARM_DIV2_ORDER r1, r2 + + mov r0, r0, lsr r2 + ret lr + +UNWIND(.fnend) +ENDPROC(__udivsi3) +ENDPROC(__aeabi_uidiv) + +ENTRY(__umodsi3) +UNWIND(.fnstart) + + subs r2, r1, #1 @ compare divisor with 1 + bcc Ldiv0 + cmpne r0, r1 @ compare dividend with divisor + moveq r0, #0 + tsthi r1, r2 @ see if divisor is power of 2 + andeq r0, r0, r2 + retls lr + + ARM_MOD_BODY r0, r1, r2, r3 + + ret lr + +UNWIND(.fnend) +ENDPROC(__umodsi3) + +ENTRY(__divsi3) +ENTRY(__aeabi_idiv) +UNWIND(.fnstart) + + cmp r1, #0 + eor ip, r0, r1 @ save the sign of the result. + beq Ldiv0 + rsbmi r1, r1, #0 @ loops below use unsigned. + subs r2, r1, #1 @ division by 1 or -1 ? + beq 10f + movs r3, r0 + rsbmi r3, r0, #0 @ positive dividend value + cmp r3, r1 + bls 11f + tst r1, r2 @ divisor is power of 2 ? + beq 12f + + ARM_DIV_BODY r3, r1, r0, r2 + + cmp ip, #0 + rsbmi r0, r0, #0 + ret lr + +10: teq ip, r0 @ same sign ? + rsbmi r0, r0, #0 + ret lr + +11: movlo r0, #0 + moveq r0, ip, asr #31 + orreq r0, r0, #1 + ret lr + +12: ARM_DIV2_ORDER r1, r2 + + cmp ip, #0 + mov r0, r3, lsr r2 + rsbmi r0, r0, #0 + ret lr + +UNWIND(.fnend) +ENDPROC(__divsi3) +ENDPROC(__aeabi_idiv) + +ENTRY(__modsi3) +UNWIND(.fnstart) + + cmp r1, #0 + beq Ldiv0 + rsbmi r1, r1, #0 @ loops below use unsigned. + movs ip, r0 @ preserve sign of dividend + rsbmi r0, r0, #0 @ if negative make positive + subs r2, r1, #1 @ compare divisor with 1 + cmpne r0, r1 @ compare dividend with divisor + moveq r0, #0 + tsthi r1, r2 @ see if divisor is power of 2 + andeq r0, r0, r2 + bls 10f + + ARM_MOD_BODY r0, r1, r2, r3 + +10: cmp ip, #0 + rsbmi r0, r0, #0 + ret lr + +UNWIND(.fnend) +ENDPROC(__modsi3) + +#ifdef CONFIG_AEABI + +ENTRY(__aeabi_uidivmod) +UNWIND(.fnstart) +UNWIND(.save {r0, r1, ip, lr} ) + + stmfd sp!, {r0, r1, ip, lr} + bl __aeabi_uidiv + ldmfd sp!, {r1, r2, ip, lr} + mul r3, r0, r2 + sub r1, r1, r3 + ret lr + +UNWIND(.fnend) +ENDPROC(__aeabi_uidivmod) + +ENTRY(__aeabi_idivmod) +UNWIND(.fnstart) +UNWIND(.save {r0, r1, ip, lr} ) + stmfd sp!, {r0, r1, ip, lr} + bl __aeabi_idiv + ldmfd sp!, {r1, r2, ip, lr} + mul r3, r0, r2 + sub r1, r1, r3 + ret lr + +UNWIND(.fnend) +ENDPROC(__aeabi_idivmod) + +#endif + +Ldiv0: +UNWIND(.fnstart) +UNWIND(.pad #4) +UNWIND(.save {lr}) + str lr, [sp, #-8]! + bl __div0 + mov r0, #0 @ About as wrong as it could be. + ldr pc, [sp], #8 +UNWIND(.fnend) +ENDPROC(Ldiv0) diff --git a/kernel/arch/arm/lib/lshrdi3.S b/kernel/arch/arm/lib/lshrdi3.S new file mode 100644 index 000000000..922dcd88b --- /dev/null +++ b/kernel/arch/arm/lib/lshrdi3.S @@ -0,0 +1,54 @@ +/* Copyright 1995, 1996, 1998, 1999, 2000, 2003, 2004, 2005 + Free Software Foundation, Inc. + +This file is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +In addition to the permissions in the GNU General Public License, the +Free Software Foundation gives you unlimited permission to link the +compiled version of this file into combinations with other programs, +and to distribute those combinations without any restriction coming +from the use of this file. (The General Public License restrictions +do apply in other respects; for example, they cover modification of +the file, and distribution when not linked into a combine +executable.) + +This file is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; see the file COPYING. If not, write to +the Free Software Foundation, 51 Franklin Street, Fifth Floor, +Boston, MA 02110-1301, USA. */ + + +#include +#include + +#ifdef __ARMEB__ +#define al r1 +#define ah r0 +#else +#define al r0 +#define ah r1 +#endif + +ENTRY(__lshrdi3) +ENTRY(__aeabi_llsr) + + subs r3, r2, #32 + rsb ip, r2, #32 + movmi al, al, lsr r2 + movpl al, ah, lsr r3 + ARM( orrmi al, al, ah, lsl ip ) + THUMB( lslmi r3, ah, ip ) + THUMB( orrmi al, al, r3 ) + mov ah, ah, lsr r2 + ret lr + +ENDPROC(__lshrdi3) +ENDPROC(__aeabi_llsr) diff --git a/kernel/arch/arm/lib/memchr.S b/kernel/arch/arm/lib/memchr.S new file mode 100644 index 000000000..74a5bed6d --- /dev/null +++ b/kernel/arch/arm/lib/memchr.S @@ -0,0 +1,26 @@ +/* + * linux/arch/arm/lib/memchr.S + * + * Copyright (C) 1995-2000 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * ASM optimised string functions + */ +#include +#include + + .text + .align 5 +ENTRY(memchr) +1: subs r2, r2, #1 + bmi 2f + ldrb r3, [r0], #1 + teq r3, r1 + bne 1b + sub r0, r0, #1 +2: movne r0, #0 + ret lr +ENDPROC(memchr) diff --git a/kernel/arch/arm/lib/memcpy.S b/kernel/arch/arm/lib/memcpy.S new file mode 100644 index 000000000..7797e81e4 --- /dev/null +++ b/kernel/arch/arm/lib/memcpy.S @@ -0,0 +1,68 @@ +/* + * linux/arch/arm/lib/memcpy.S + * + * Author: Nicolas Pitre + * Created: Sep 28, 2005 + * Copyright: MontaVista Software, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include + +#define LDR1W_SHIFT 0 +#define STR1W_SHIFT 0 + + .macro ldr1w ptr reg abort + W(ldr) \reg, [\ptr], #4 + .endm + + .macro ldr4w ptr reg1 reg2 reg3 reg4 abort + ldmia \ptr!, {\reg1, \reg2, \reg3, \reg4} + .endm + + .macro ldr8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort + ldmia \ptr!, {\reg1, \reg2, \reg3, \reg4, \reg5, \reg6, \reg7, \reg8} + .endm + + .macro ldr1b ptr reg cond=al abort + ldr\cond\()b \reg, [\ptr], #1 + .endm + + .macro str1w ptr reg abort + W(str) \reg, [\ptr], #4 + .endm + + .macro str8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort + stmia \ptr!, {\reg1, \reg2, \reg3, \reg4, \reg5, \reg6, \reg7, \reg8} + .endm + + .macro str1b ptr reg cond=al abort + str\cond\()b \reg, [\ptr], #1 + .endm + + .macro enter reg1 reg2 + stmdb sp!, {r0, \reg1, \reg2} + .endm + + .macro usave reg1 reg2 + UNWIND( .save {r0, \reg1, \reg2} ) + .endm + + .macro exit reg1 reg2 + ldmfd sp!, {r0, \reg1, \reg2} + .endm + + .text + +/* Prototype: void *memcpy(void *dest, const void *src, size_t n); */ + +ENTRY(memcpy) + +#include "copy_template.S" + +ENDPROC(memcpy) diff --git a/kernel/arch/arm/lib/memmove.S b/kernel/arch/arm/lib/memmove.S new file mode 100644 index 000000000..69a9d47fc --- /dev/null +++ b/kernel/arch/arm/lib/memmove.S @@ -0,0 +1,227 @@ +/* + * linux/arch/arm/lib/memmove.S + * + * Author: Nicolas Pitre + * Created: Sep 28, 2005 + * Copyright: (C) MontaVista Software Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include + + .text + +/* + * Prototype: void *memmove(void *dest, const void *src, size_t n); + * + * Note: + * + * If the memory regions don't overlap, we simply branch to memcpy which is + * normally a bit faster. Otherwise the copy is done going downwards. This + * is a transposition of the code from copy_template.S but with the copy + * occurring in the opposite direction. + */ + +ENTRY(memmove) + UNWIND( .fnstart ) + + subs ip, r0, r1 + cmphi r2, ip + bls memcpy + + stmfd sp!, {r0, r4, lr} + UNWIND( .fnend ) + + UNWIND( .fnstart ) + UNWIND( .save {r0, r4, lr} ) @ in first stmfd block + add r1, r1, r2 + add r0, r0, r2 + subs r2, r2, #4 + blt 8f + ands ip, r0, #3 + PLD( pld [r1, #-4] ) + bne 9f + ands ip, r1, #3 + bne 10f + +1: subs r2, r2, #(28) + stmfd sp!, {r5 - r8} + UNWIND( .fnend ) + + UNWIND( .fnstart ) + UNWIND( .save {r0, r4, lr} ) + UNWIND( .save {r5 - r8} ) @ in second stmfd block + blt 5f + + CALGN( ands ip, r0, #31 ) + CALGN( sbcnes r4, ip, r2 ) @ C is always set here + CALGN( bcs 2f ) + CALGN( adr r4, 6f ) + CALGN( subs r2, r2, ip ) @ C is set here + CALGN( rsb ip, ip, #32 ) + CALGN( add pc, r4, ip ) + + PLD( pld [r1, #-4] ) +2: PLD( subs r2, r2, #96 ) + PLD( pld [r1, #-32] ) + PLD( blt 4f ) + PLD( pld [r1, #-64] ) + PLD( pld [r1, #-96] ) + +3: PLD( pld [r1, #-128] ) +4: ldmdb r1!, {r3, r4, r5, r6, r7, r8, ip, lr} + subs r2, r2, #32 + stmdb r0!, {r3, r4, r5, r6, r7, r8, ip, lr} + bge 3b + PLD( cmn r2, #96 ) + PLD( bge 4b ) + +5: ands ip, r2, #28 + rsb ip, ip, #32 + addne pc, pc, ip @ C is always clear here + b 7f +6: W(nop) + W(ldr) r3, [r1, #-4]! + W(ldr) r4, [r1, #-4]! + W(ldr) r5, [r1, #-4]! + W(ldr) r6, [r1, #-4]! + W(ldr) r7, [r1, #-4]! + W(ldr) r8, [r1, #-4]! + W(ldr) lr, [r1, #-4]! + + add pc, pc, ip + nop + W(nop) + W(str) r3, [r0, #-4]! + W(str) r4, [r0, #-4]! + W(str) r5, [r0, #-4]! + W(str) r6, [r0, #-4]! + W(str) r7, [r0, #-4]! + W(str) r8, [r0, #-4]! + W(str) lr, [r0, #-4]! + + CALGN( bcs 2b ) + +7: ldmfd sp!, {r5 - r8} + UNWIND( .fnend ) @ end of second stmfd block + + UNWIND( .fnstart ) + UNWIND( .save {r0, r4, lr} ) @ still in first stmfd block + +8: movs r2, r2, lsl #31 + ldrneb r3, [r1, #-1]! + ldrcsb r4, [r1, #-1]! + ldrcsb ip, [r1, #-1] + strneb r3, [r0, #-1]! + strcsb r4, [r0, #-1]! + strcsb ip, [r0, #-1] + ldmfd sp!, {r0, r4, pc} + +9: cmp ip, #2 + ldrgtb r3, [r1, #-1]! + ldrgeb r4, [r1, #-1]! + ldrb lr, [r1, #-1]! + strgtb r3, [r0, #-1]! + strgeb r4, [r0, #-1]! + subs r2, r2, ip + strb lr, [r0, #-1]! + blt 8b + ands ip, r1, #3 + beq 1b + +10: bic r1, r1, #3 + cmp ip, #2 + ldr r3, [r1, #0] + beq 17f + blt 18f + UNWIND( .fnend ) + + + .macro backward_copy_shift push pull + + UNWIND( .fnstart ) + UNWIND( .save {r0, r4, lr} ) @ still in first stmfd block + subs r2, r2, #28 + blt 14f + + CALGN( ands ip, r0, #31 ) + CALGN( sbcnes r4, ip, r2 ) @ C is always set here + CALGN( subcc r2, r2, ip ) + CALGN( bcc 15f ) + +11: stmfd sp!, {r5 - r9} + UNWIND( .fnend ) + + UNWIND( .fnstart ) + UNWIND( .save {r0, r4, lr} ) + UNWIND( .save {r5 - r9} ) @ in new second stmfd block + + PLD( pld [r1, #-4] ) + PLD( subs r2, r2, #96 ) + PLD( pld [r1, #-32] ) + PLD( blt 13f ) + PLD( pld [r1, #-64] ) + PLD( pld [r1, #-96] ) + +12: PLD( pld [r1, #-128] ) +13: ldmdb r1!, {r7, r8, r9, ip} + mov lr, r3, lspush #\push + subs r2, r2, #32 + ldmdb r1!, {r3, r4, r5, r6} + orr lr, lr, ip, lspull #\pull + mov ip, ip, lspush #\push + orr ip, ip, r9, lspull #\pull + mov r9, r9, lspush #\push + orr r9, r9, r8, lspull #\pull + mov r8, r8, lspush #\push + orr r8, r8, r7, lspull #\pull + mov r7, r7, lspush #\push + orr r7, r7, r6, lspull #\pull + mov r6, r6, lspush #\push + orr r6, r6, r5, lspull #\pull + mov r5, r5, lspush #\push + orr r5, r5, r4, lspull #\pull + mov r4, r4, lspush #\push + orr r4, r4, r3, lspull #\pull + stmdb r0!, {r4 - r9, ip, lr} + bge 12b + PLD( cmn r2, #96 ) + PLD( bge 13b ) + + ldmfd sp!, {r5 - r9} + UNWIND( .fnend ) @ end of the second stmfd block + + UNWIND( .fnstart ) + UNWIND( .save {r0, r4, lr} ) @ still in first stmfd block + +14: ands ip, r2, #28 + beq 16f + +15: mov lr, r3, lspush #\push + ldr r3, [r1, #-4]! + subs ip, ip, #4 + orr lr, lr, r3, lspull #\pull + str lr, [r0, #-4]! + bgt 15b + CALGN( cmp r2, #0 ) + CALGN( bge 11b ) + +16: add r1, r1, #(\pull / 8) + b 8b + UNWIND( .fnend ) + + .endm + + + backward_copy_shift push=8 pull=24 + +17: backward_copy_shift push=16 pull=16 + +18: backward_copy_shift push=24 pull=8 + +ENDPROC(memmove) diff --git a/kernel/arch/arm/lib/memset.S b/kernel/arch/arm/lib/memset.S new file mode 100644 index 000000000..a4ee97b5a --- /dev/null +++ b/kernel/arch/arm/lib/memset.S @@ -0,0 +1,135 @@ +/* + * linux/arch/arm/lib/memset.S + * + * Copyright (C) 1995-2000 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * ASM optimised string functions + */ +#include +#include +#include + + .text + .align 5 + +ENTRY(memset) +UNWIND( .fnstart ) + ands r3, r0, #3 @ 1 unaligned? + mov ip, r0 @ preserve r0 as return value + bne 6f @ 1 +/* + * we know that the pointer in ip is aligned to a word boundary. + */ +1: orr r1, r1, r1, lsl #8 + orr r1, r1, r1, lsl #16 + mov r3, r1 + cmp r2, #16 + blt 4f + +#if ! CALGN(1)+0 + +/* + * We need 2 extra registers for this loop - use r8 and the LR + */ + stmfd sp!, {r8, lr} +UNWIND( .fnend ) +UNWIND( .fnstart ) +UNWIND( .save {r8, lr} ) + mov r8, r1 + mov lr, r1 + +2: subs r2, r2, #64 + stmgeia ip!, {r1, r3, r8, lr} @ 64 bytes at a time. + stmgeia ip!, {r1, r3, r8, lr} + stmgeia ip!, {r1, r3, r8, lr} + stmgeia ip!, {r1, r3, r8, lr} + bgt 2b + ldmeqfd sp!, {r8, pc} @ Now <64 bytes to go. +/* + * No need to correct the count; we're only testing bits from now on + */ + tst r2, #32 + stmneia ip!, {r1, r3, r8, lr} + stmneia ip!, {r1, r3, r8, lr} + tst r2, #16 + stmneia ip!, {r1, r3, r8, lr} + ldmfd sp!, {r8, lr} +UNWIND( .fnend ) + +#else + +/* + * This version aligns the destination pointer in order to write + * whole cache lines at once. + */ + + stmfd sp!, {r4-r8, lr} +UNWIND( .fnend ) +UNWIND( .fnstart ) +UNWIND( .save {r4-r8, lr} ) + mov r4, r1 + mov r5, r1 + mov r6, r1 + mov r7, r1 + mov r8, r1 + mov lr, r1 + + cmp r2, #96 + tstgt ip, #31 + ble 3f + + and r8, ip, #31 + rsb r8, r8, #32 + sub r2, r2, r8 + movs r8, r8, lsl #(32 - 4) + stmcsia ip!, {r4, r5, r6, r7} + stmmiia ip!, {r4, r5} + tst r8, #(1 << 30) + mov r8, r1 + strne r1, [ip], #4 + +3: subs r2, r2, #64 + stmgeia ip!, {r1, r3-r8, lr} + stmgeia ip!, {r1, r3-r8, lr} + bgt 3b + ldmeqfd sp!, {r4-r8, pc} + + tst r2, #32 + stmneia ip!, {r1, r3-r8, lr} + tst r2, #16 + stmneia ip!, {r4-r7} + ldmfd sp!, {r4-r8, lr} +UNWIND( .fnend ) + +#endif + +UNWIND( .fnstart ) +4: tst r2, #8 + stmneia ip!, {r1, r3} + tst r2, #4 + strne r1, [ip], #4 +/* + * When we get here, we've got less than 4 bytes to zero. We + * may have an unaligned pointer as well. + */ +5: tst r2, #2 + strneb r1, [ip], #1 + strneb r1, [ip], #1 + tst r2, #1 + strneb r1, [ip], #1 + ret lr + +6: subs r2, r2, #4 @ 1 do we have enough + blt 5b @ 1 bytes to align with? + cmp r3, #2 @ 1 + strltb r1, [ip], #1 @ 1 + strleb r1, [ip], #1 @ 1 + strb r1, [ip], #1 @ 1 + add r2, r2, r3 @ 1 (r2 = r2 - (4 - r3)) + b 1b +UNWIND( .fnend ) +ENDPROC(memset) diff --git a/kernel/arch/arm/lib/memzero.S b/kernel/arch/arm/lib/memzero.S new file mode 100644 index 000000000..0eded952e --- /dev/null +++ b/kernel/arch/arm/lib/memzero.S @@ -0,0 +1,137 @@ +/* + * linux/arch/arm/lib/memzero.S + * + * Copyright (C) 1995-2000 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include +#include + + .text + .align 5 + .word 0 +/* + * Align the pointer in r0. r3 contains the number of bytes that we are + * mis-aligned by, and r1 is the number of bytes. If r1 < 4, then we + * don't bother; we use byte stores instead. + */ +UNWIND( .fnstart ) +1: subs r1, r1, #4 @ 1 do we have enough + blt 5f @ 1 bytes to align with? + cmp r3, #2 @ 1 + strltb r2, [r0], #1 @ 1 + strleb r2, [r0], #1 @ 1 + strb r2, [r0], #1 @ 1 + add r1, r1, r3 @ 1 (r1 = r1 - (4 - r3)) +/* + * The pointer is now aligned and the length is adjusted. Try doing the + * memzero again. + */ + +ENTRY(__memzero) + mov r2, #0 @ 1 + ands r3, r0, #3 @ 1 unaligned? + bne 1b @ 1 +/* + * r3 = 0, and we know that the pointer in r0 is aligned to a word boundary. + */ + cmp r1, #16 @ 1 we can skip this chunk if we + blt 4f @ 1 have < 16 bytes + +#if ! CALGN(1)+0 + +/* + * We need an extra register for this loop - save the return address and + * use the LR + */ + str lr, [sp, #-4]! @ 1 +UNWIND( .fnend ) +UNWIND( .fnstart ) +UNWIND( .save {lr} ) + mov ip, r2 @ 1 + mov lr, r2 @ 1 + +3: subs r1, r1, #64 @ 1 write 32 bytes out per loop + stmgeia r0!, {r2, r3, ip, lr} @ 4 + stmgeia r0!, {r2, r3, ip, lr} @ 4 + stmgeia r0!, {r2, r3, ip, lr} @ 4 + stmgeia r0!, {r2, r3, ip, lr} @ 4 + bgt 3b @ 1 + ldmeqfd sp!, {pc} @ 1/2 quick exit +/* + * No need to correct the count; we're only testing bits from now on + */ + tst r1, #32 @ 1 + stmneia r0!, {r2, r3, ip, lr} @ 4 + stmneia r0!, {r2, r3, ip, lr} @ 4 + tst r1, #16 @ 1 16 bytes or more? + stmneia r0!, {r2, r3, ip, lr} @ 4 + ldr lr, [sp], #4 @ 1 +UNWIND( .fnend ) + +#else + +/* + * This version aligns the destination pointer in order to write + * whole cache lines at once. + */ + + stmfd sp!, {r4-r7, lr} +UNWIND( .fnend ) +UNWIND( .fnstart ) +UNWIND( .save {r4-r7, lr} ) + mov r4, r2 + mov r5, r2 + mov r6, r2 + mov r7, r2 + mov ip, r2 + mov lr, r2 + + cmp r1, #96 + andgts ip, r0, #31 + ble 3f + + rsb ip, ip, #32 + sub r1, r1, ip + movs ip, ip, lsl #(32 - 4) + stmcsia r0!, {r4, r5, r6, r7} + stmmiia r0!, {r4, r5} + movs ip, ip, lsl #2 + strcs r2, [r0], #4 + +3: subs r1, r1, #64 + stmgeia r0!, {r2-r7, ip, lr} + stmgeia r0!, {r2-r7, ip, lr} + bgt 3b + ldmeqfd sp!, {r4-r7, pc} + + tst r1, #32 + stmneia r0!, {r2-r7, ip, lr} + tst r1, #16 + stmneia r0!, {r4-r7} + ldmfd sp!, {r4-r7, lr} +UNWIND( .fnend ) + +#endif + +UNWIND( .fnstart ) +4: tst r1, #8 @ 1 8 bytes or more? + stmneia r0!, {r2, r3} @ 2 + tst r1, #4 @ 1 4 bytes or more? + strne r2, [r0], #4 @ 1 +/* + * When we get here, we've got less than 4 bytes to zero. We + * may have an unaligned pointer as well. + */ +5: tst r1, #2 @ 1 2 bytes or more? + strneb r2, [r0], #1 @ 1 + strneb r2, [r0], #1 @ 1 + tst r1, #1 @ 1 a byte left over + strneb r2, [r0], #1 @ 1 + ret lr @ 1 +UNWIND( .fnend ) +ENDPROC(__memzero) diff --git a/kernel/arch/arm/lib/muldi3.S b/kernel/arch/arm/lib/muldi3.S new file mode 100644 index 000000000..204305956 --- /dev/null +++ b/kernel/arch/arm/lib/muldi3.S @@ -0,0 +1,48 @@ +/* + * linux/arch/arm/lib/muldi3.S + * + * Author: Nicolas Pitre + * Created: Oct 19, 2005 + * Copyright: Monta Vista Software, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include + +#ifdef __ARMEB__ +#define xh r0 +#define xl r1 +#define yh r2 +#define yl r3 +#else +#define xl r0 +#define xh r1 +#define yl r2 +#define yh r3 +#endif + +ENTRY(__muldi3) +ENTRY(__aeabi_lmul) + + mul xh, yl, xh + mla xh, xl, yh, xh + mov ip, xl, lsr #16 + mov yh, yl, lsr #16 + bic xl, xl, ip, lsl #16 + bic yl, yl, yh, lsl #16 + mla xh, yh, ip, xh + mul yh, xl, yh + mul xl, yl, xl + mul ip, yl, ip + adds xl, xl, yh, lsl #16 + adc xh, xh, yh, lsr #16 + adds xl, xl, ip, lsl #16 + adc xh, xh, ip, lsr #16 + ret lr + +ENDPROC(__muldi3) +ENDPROC(__aeabi_lmul) diff --git a/kernel/arch/arm/lib/putuser.S b/kernel/arch/arm/lib/putuser.S new file mode 100644 index 000000000..38d660d37 --- /dev/null +++ b/kernel/arch/arm/lib/putuser.S @@ -0,0 +1,98 @@ +/* + * linux/arch/arm/lib/putuser.S + * + * Copyright (C) 2001 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Idea from x86 version, (C) Copyright 1998 Linus Torvalds + * + * These functions have a non-standard call interface to make + * them more efficient, especially as they return an error + * value in addition to the "real" return value. + * + * __put_user_X + * + * Inputs: r0 contains the address + * r1 contains the address limit, which must be preserved + * r2, r3 contains the value + * Outputs: r0 is the error code + * lr corrupted + * + * No other registers must be altered. (see + * for specific ASM register usage). + * + * Note that ADDR_LIMIT is either 0 or 0xc0000000 + * Note also that it is intended that __put_user_bad is not global. + */ +#include +#include +#include +#include + +ENTRY(__put_user_1) + check_uaccess r0, 1, r1, ip, __put_user_bad +1: TUSER(strb) r2, [r0] + mov r0, #0 + ret lr +ENDPROC(__put_user_1) + +ENTRY(__put_user_2) + check_uaccess r0, 2, r1, ip, __put_user_bad + mov ip, r2, lsr #8 +#ifdef CONFIG_THUMB2_KERNEL +#ifndef __ARMEB__ +2: TUSER(strb) r2, [r0] +3: TUSER(strb) ip, [r0, #1] +#else +2: TUSER(strb) ip, [r0] +3: TUSER(strb) r2, [r0, #1] +#endif +#else /* !CONFIG_THUMB2_KERNEL */ +#ifndef __ARMEB__ +2: TUSER(strb) r2, [r0], #1 +3: TUSER(strb) ip, [r0] +#else +2: TUSER(strb) ip, [r0], #1 +3: TUSER(strb) r2, [r0] +#endif +#endif /* CONFIG_THUMB2_KERNEL */ + mov r0, #0 + ret lr +ENDPROC(__put_user_2) + +ENTRY(__put_user_4) + check_uaccess r0, 4, r1, ip, __put_user_bad +4: TUSER(str) r2, [r0] + mov r0, #0 + ret lr +ENDPROC(__put_user_4) + +ENTRY(__put_user_8) + check_uaccess r0, 8, r1, ip, __put_user_bad +#ifdef CONFIG_THUMB2_KERNEL +5: TUSER(str) r2, [r0] +6: TUSER(str) r3, [r0, #4] +#else +5: TUSER(str) r2, [r0], #4 +6: TUSER(str) r3, [r0] +#endif + mov r0, #0 + ret lr +ENDPROC(__put_user_8) + +__put_user_bad: + mov r0, #-EFAULT + ret lr +ENDPROC(__put_user_bad) + +.pushsection __ex_table, "a" + .long 1b, __put_user_bad + .long 2b, __put_user_bad + .long 3b, __put_user_bad + .long 4b, __put_user_bad + .long 5b, __put_user_bad + .long 6b, __put_user_bad +.popsection diff --git a/kernel/arch/arm/lib/setbit.S b/kernel/arch/arm/lib/setbit.S new file mode 100644 index 000000000..618fedae4 --- /dev/null +++ b/kernel/arch/arm/lib/setbit.S @@ -0,0 +1,15 @@ +/* + * linux/arch/arm/lib/setbit.S + * + * Copyright (C) 1995-1996 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include +#include "bitops.h" + .text + +bitop _set_bit, orr diff --git a/kernel/arch/arm/lib/strchr.S b/kernel/arch/arm/lib/strchr.S new file mode 100644 index 000000000..013d64c71 --- /dev/null +++ b/kernel/arch/arm/lib/strchr.S @@ -0,0 +1,27 @@ +/* + * linux/arch/arm/lib/strchr.S + * + * Copyright (C) 1995-2000 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * ASM optimised string functions + */ +#include +#include + + .text + .align 5 +ENTRY(strchr) + and r1, r1, #0xff +1: ldrb r2, [r0], #1 + teq r2, r1 + teqne r2, #0 + bne 1b + teq r2, r1 + movne r0, #0 + subeq r0, r0, #1 + ret lr +ENDPROC(strchr) diff --git a/kernel/arch/arm/lib/strrchr.S b/kernel/arch/arm/lib/strrchr.S new file mode 100644 index 000000000..3cec1c748 --- /dev/null +++ b/kernel/arch/arm/lib/strrchr.S @@ -0,0 +1,26 @@ +/* + * linux/arch/arm/lib/strrchr.S + * + * Copyright (C) 1995-2000 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * ASM optimised string functions + */ +#include +#include + + .text + .align 5 +ENTRY(strrchr) + mov r3, #0 +1: ldrb r2, [r0], #1 + teq r2, r1 + subeq r3, r0, #1 + teq r2, #0 + bne 1b + mov r0, r3 + ret lr +ENDPROC(strrchr) diff --git a/kernel/arch/arm/lib/testchangebit.S b/kernel/arch/arm/lib/testchangebit.S new file mode 100644 index 000000000..4becdc3a5 --- /dev/null +++ b/kernel/arch/arm/lib/testchangebit.S @@ -0,0 +1,15 @@ +/* + * linux/arch/arm/lib/testchangebit.S + * + * Copyright (C) 1995-1996 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include +#include "bitops.h" + .text + +testop _test_and_change_bit, eor, str diff --git a/kernel/arch/arm/lib/testclearbit.S b/kernel/arch/arm/lib/testclearbit.S new file mode 100644 index 000000000..918841dcc --- /dev/null +++ b/kernel/arch/arm/lib/testclearbit.S @@ -0,0 +1,15 @@ +/* + * linux/arch/arm/lib/testclearbit.S + * + * Copyright (C) 1995-1996 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include +#include "bitops.h" + .text + +testop _test_and_clear_bit, bicne, strne diff --git a/kernel/arch/arm/lib/testsetbit.S b/kernel/arch/arm/lib/testsetbit.S new file mode 100644 index 000000000..8d1b2fe9e --- /dev/null +++ b/kernel/arch/arm/lib/testsetbit.S @@ -0,0 +1,15 @@ +/* + * linux/arch/arm/lib/testsetbit.S + * + * Copyright (C) 1995-1996 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include +#include "bitops.h" + .text + +testop _test_and_set_bit, orreq, streq diff --git a/kernel/arch/arm/lib/uaccess_with_memcpy.c b/kernel/arch/arm/lib/uaccess_with_memcpy.c new file mode 100644 index 000000000..3e58d7100 --- /dev/null +++ b/kernel/arch/arm/lib/uaccess_with_memcpy.c @@ -0,0 +1,270 @@ +/* + * linux/arch/arm/lib/uaccess_with_memcpy.c + * + * Written by: Lennert Buytenhek and Nicolas Pitre + * Copyright (C) 2009 Marvell Semiconductor + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include /* for in_atomic() */ +#include +#include +#include +#include +#include + +static int +pin_page_for_write(const void __user *_addr, pte_t **ptep, spinlock_t **ptlp) +{ + unsigned long addr = (unsigned long)_addr; + pgd_t *pgd; + pmd_t *pmd; + pte_t *pte; + pud_t *pud; + spinlock_t *ptl; + + pgd = pgd_offset(current->mm, addr); + if (unlikely(pgd_none(*pgd) || pgd_bad(*pgd))) + return 0; + + pud = pud_offset(pgd, addr); + if (unlikely(pud_none(*pud) || pud_bad(*pud))) + return 0; + + pmd = pmd_offset(pud, addr); + if (unlikely(pmd_none(*pmd))) + return 0; + + /* + * A pmd can be bad if it refers to a HugeTLB or THP page. + * + * Both THP and HugeTLB pages have the same pmd layout + * and should not be manipulated by the pte functions. + * + * Lock the page table for the destination and check + * to see that it's still huge and whether or not we will + * need to fault on write, or if we have a splitting THP. + */ + if (unlikely(pmd_thp_or_huge(*pmd))) { + ptl = ¤t->mm->page_table_lock; + spin_lock(ptl); + if (unlikely(!pmd_thp_or_huge(*pmd) + || pmd_hugewillfault(*pmd) + || pmd_trans_splitting(*pmd))) { + spin_unlock(ptl); + return 0; + } + + *ptep = NULL; + *ptlp = ptl; + return 1; + } + + if (unlikely(pmd_bad(*pmd))) + return 0; + + pte = pte_offset_map_lock(current->mm, pmd, addr, &ptl); + if (unlikely(!pte_present(*pte) || !pte_young(*pte) || + !pte_write(*pte) || !pte_dirty(*pte))) { + pte_unmap_unlock(pte, ptl); + return 0; + } + + *ptep = pte; + *ptlp = ptl; + + return 1; +} + +static unsigned long noinline +__copy_to_user_memcpy(void __user *to, const void *from, unsigned long n) +{ + int atomic; + + if (unlikely(segment_eq(get_fs(), KERNEL_DS))) { + memcpy((void *)to, from, n); + return 0; + } + + /* the mmap semaphore is taken only if not in an atomic context */ + atomic = in_atomic(); + + if (!atomic) + down_read(¤t->mm->mmap_sem); + while (n) { + pte_t *pte; + spinlock_t *ptl; + int tocopy; + + while (!pin_page_for_write(to, &pte, &ptl)) { + if (!atomic) + up_read(¤t->mm->mmap_sem); + if (__put_user(0, (char __user *)to)) + goto out; + if (!atomic) + down_read(¤t->mm->mmap_sem); + } + + tocopy = (~(unsigned long)to & ~PAGE_MASK) + 1; + if (tocopy > n) + tocopy = n; + + memcpy((void *)to, from, tocopy); + to += tocopy; + from += tocopy; + n -= tocopy; + + if (pte) + pte_unmap_unlock(pte, ptl); + else + spin_unlock(ptl); + } + if (!atomic) + up_read(¤t->mm->mmap_sem); + +out: + return n; +} + +unsigned long +__copy_to_user(void __user *to, const void *from, unsigned long n) +{ + /* + * This test is stubbed out of the main function above to keep + * the overhead for small copies low by avoiding a large + * register dump on the stack just to reload them right away. + * With frame pointer disabled, tail call optimization kicks in + * as well making this test almost invisible. + */ + if (n < 64) + return __copy_to_user_std(to, from, n); + return __copy_to_user_memcpy(to, from, n); +} + +static unsigned long noinline +__clear_user_memset(void __user *addr, unsigned long n) +{ + if (unlikely(segment_eq(get_fs(), KERNEL_DS))) { + memset((void *)addr, 0, n); + return 0; + } + + down_read(¤t->mm->mmap_sem); + while (n) { + pte_t *pte; + spinlock_t *ptl; + int tocopy; + + while (!pin_page_for_write(addr, &pte, &ptl)) { + up_read(¤t->mm->mmap_sem); + if (__put_user(0, (char __user *)addr)) + goto out; + down_read(¤t->mm->mmap_sem); + } + + tocopy = (~(unsigned long)addr & ~PAGE_MASK) + 1; + if (tocopy > n) + tocopy = n; + + memset((void *)addr, 0, tocopy); + addr += tocopy; + n -= tocopy; + + if (pte) + pte_unmap_unlock(pte, ptl); + else + spin_unlock(ptl); + } + up_read(¤t->mm->mmap_sem); + +out: + return n; +} + +unsigned long __clear_user(void __user *addr, unsigned long n) +{ + /* See rational for this in __copy_to_user() above. */ + if (n < 64) + return __clear_user_std(addr, n); + return __clear_user_memset(addr, n); +} + +#if 0 + +/* + * This code is disabled by default, but kept around in case the chosen + * thresholds need to be revalidated. Some overhead (small but still) + * would be implied by a runtime determined variable threshold, and + * so far the measurement on concerned targets didn't show a worthwhile + * variation. + * + * Note that a fairly precise sched_clock() implementation is needed + * for results to make some sense. + */ + +#include + +static int __init test_size_treshold(void) +{ + struct page *src_page, *dst_page; + void *user_ptr, *kernel_ptr; + unsigned long long t0, t1, t2; + int size, ret; + + ret = -ENOMEM; + src_page = alloc_page(GFP_KERNEL); + if (!src_page) + goto no_src; + dst_page = alloc_page(GFP_KERNEL); + if (!dst_page) + goto no_dst; + kernel_ptr = page_address(src_page); + user_ptr = vmap(&dst_page, 1, VM_IOREMAP, __pgprot(__P010)); + if (!user_ptr) + goto no_vmap; + + /* warm up the src page dcache */ + ret = __copy_to_user_memcpy(user_ptr, kernel_ptr, PAGE_SIZE); + + for (size = PAGE_SIZE; size >= 4; size /= 2) { + t0 = sched_clock(); + ret |= __copy_to_user_memcpy(user_ptr, kernel_ptr, size); + t1 = sched_clock(); + ret |= __copy_to_user_std(user_ptr, kernel_ptr, size); + t2 = sched_clock(); + printk("copy_to_user: %d %llu %llu\n", size, t1 - t0, t2 - t1); + } + + for (size = PAGE_SIZE; size >= 4; size /= 2) { + t0 = sched_clock(); + ret |= __clear_user_memset(user_ptr, size); + t1 = sched_clock(); + ret |= __clear_user_std(user_ptr, size); + t2 = sched_clock(); + printk("clear_user: %d %llu %llu\n", size, t1 - t0, t2 - t1); + } + + if (ret) + ret = -EFAULT; + + vunmap(user_ptr); +no_vmap: + put_page(dst_page); +no_dst: + put_page(src_page); +no_src: + return ret; +} + +subsys_initcall(test_size_treshold); + +#endif diff --git a/kernel/arch/arm/lib/ucmpdi2.S b/kernel/arch/arm/lib/ucmpdi2.S new file mode 100644 index 000000000..ad4a63091 --- /dev/null +++ b/kernel/arch/arm/lib/ucmpdi2.S @@ -0,0 +1,53 @@ +/* + * linux/arch/arm/lib/ucmpdi2.S + * + * Author: Nicolas Pitre + * Created: Oct 19, 2005 + * Copyright: Monta Vista Software, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include + +#ifdef __ARMEB__ +#define xh r0 +#define xl r1 +#define yh r2 +#define yl r3 +#else +#define xl r0 +#define xh r1 +#define yl r2 +#define yh r3 +#endif + +ENTRY(__ucmpdi2) + + cmp xh, yh + cmpeq xl, yl + movlo r0, #0 + moveq r0, #1 + movhi r0, #2 + ret lr + +ENDPROC(__ucmpdi2) + +#ifdef CONFIG_AEABI + +ENTRY(__aeabi_ulcmp) + + cmp xh, yh + cmpeq xl, yl + movlo r0, #-1 + moveq r0, #0 + movhi r0, #1 + ret lr + +ENDPROC(__aeabi_ulcmp) + +#endif + diff --git a/kernel/arch/arm/lib/xor-neon.c b/kernel/arch/arm/lib/xor-neon.c new file mode 100644 index 000000000..2c40aeab3 --- /dev/null +++ b/kernel/arch/arm/lib/xor-neon.c @@ -0,0 +1,46 @@ +/* + * linux/arch/arm/lib/xor-neon.c + * + * Copyright (C) 2013 Linaro Ltd + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include + +MODULE_LICENSE("GPL"); + +#ifndef __ARM_NEON__ +#error You should compile this file with '-mfloat-abi=softfp -mfpu=neon' +#endif + +/* + * Pull in the reference implementations while instructing GCC (through + * -ftree-vectorize) to attempt to exploit implicit parallelism and emit + * NEON instructions. + */ +#if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6) +#pragma GCC optimize "tree-vectorize" +#else +/* + * While older versions of GCC do not generate incorrect code, they fail to + * recognize the parallel nature of these functions, and emit plain ARM code, + * which is known to be slower than the optimized ARM code in asm-arm/xor.h. + */ +#warning This code requires at least version 4.6 of GCC +#endif + +#pragma GCC diagnostic ignored "-Wunused-variable" +#include + +struct xor_block_template const xor_block_neon_inner = { + .name = "__inner_neon__", + .do_2 = xor_8regs_2, + .do_3 = xor_8regs_3, + .do_4 = xor_8regs_4, + .do_5 = xor_8regs_5, +}; +EXPORT_SYMBOL(xor_block_neon_inner); -- cgit 1.2.3-korg