diff options
Diffstat (limited to 'qemu/roms/ipxe/src/arch/x86/include/bits/string.h')
-rw-r--r-- | qemu/roms/ipxe/src/arch/x86/include/bits/string.h | 140 |
1 files changed, 126 insertions, 14 deletions
diff --git a/qemu/roms/ipxe/src/arch/x86/include/bits/string.h b/qemu/roms/ipxe/src/arch/x86/include/bits/string.h index dce994983..c26fe30d5 100644 --- a/qemu/roms/ipxe/src/arch/x86/include/bits/string.h +++ b/qemu/roms/ipxe/src/arch/x86/include/bits/string.h @@ -18,9 +18,13 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA * 02110-1301, USA. + * + * You can also choose to distribute this program under the terms of + * the Unmodified Binary Distribution Licence (as given in the file + * COPYING.UBDL), provided that you have satisfied its requirements. */ -FILE_LICENCE ( GPL2_OR_LATER ); +FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL ); /** @file * @@ -28,8 +32,6 @@ FILE_LICENCE ( GPL2_OR_LATER ); * */ -#define __HAVE_ARCH_MEMCPY - extern void * __memcpy ( void *dest, const void *src, size_t len ); extern void * __memcpy_reverse ( void *dest, const void *src, size_t len ); @@ -169,8 +171,6 @@ memcpy ( void *dest, const void *src, size_t len ) { } } -#define __HAVE_ARCH_MEMMOVE - extern void * __memmove ( void *dest, const void *src, size_t len ); /** @@ -196,8 +196,6 @@ memmove ( void *dest, const void *src, size_t len ) { } } -#define __HAVE_ARCH_MEMSET - /** * Fill memory region * @@ -206,7 +204,8 @@ memmove ( void *dest, const void *src, size_t len ) { * @v len Length * @ret dest Destination address */ -static inline void * memset ( void *dest, int fill, size_t len ) { +static inline __attribute__ (( always_inline )) void * +__memset ( void *dest, int fill, size_t len ) { void *discard_D; size_t discard_c; @@ -217,16 +216,129 @@ static inline void * memset ( void *dest, int fill, size_t len ) { return dest; } -#define __HAVE_ARCH_MEMSWAP +/** + * Fill memory region with zero (where length is a compile-time constant) + * + * @v dest Destination address + * @v len Length + * @ret dest Destination address + */ +static inline __attribute__ (( always_inline )) void * +__constant_memset_zero ( void *dest, size_t len ) { + union { + uint32_t u32[2]; + uint16_t u16[4]; + uint8_t u8[8]; + } __attribute__ (( __may_alias__ )) *dest_u = dest; + void *edi; + uint32_t eax; + + switch ( len ) { + case 0 : /* 0 bytes */ + return dest; + + /* Single-register moves. Almost certainly better than a + * string operation. We can avoid clobbering any registers, + * we can reuse a zero that happens to already be in a + * register, and we can optimise away the code entirely if the + * memset() is used to clear a region which then gets + * immediately overwritten. + */ + case 1 : /* 3 bytes */ + dest_u->u8[0] = 0; + return dest; + case 2: /* 5 bytes */ + dest_u->u16[0] = 0; + return dest; + case 4: /* 6 bytes */ + dest_u->u32[0] = 0; + return dest; + + /* Double-register moves. Very probably better than a string + * operation. + */ + case 3 : /* 9 bytes */ + dest_u->u16[0] = 0; + dest_u->u8[2] = 0; + return dest; + case 5 : /* 10 bytes */ + dest_u->u32[0] = 0; + dest_u->u8[4] = 0; + return dest; + case 6 : /* 12 bytes */ + dest_u->u32[0] = 0; + dest_u->u16[2] = 0; + return dest; + case 8 : /* 13 bytes */ + dest_u->u32[0] = 0; + dest_u->u32[1] = 0; + return dest; + } + + /* As with memcpy(), we can potentially save space by using + * multiple single-byte "stos" instructions instead of loading + * up ecx and using "rep stosb". + * + * "load ecx, rep movsb" is 7 bytes, plus an average of 1 byte + * to allow for saving/restoring ecx 50% of the time. + * + * "stosl" and "stosb" are 1 byte each, "stosw" is two bytes. + * + * The calculations are therefore the same as for memcpy(), + * giving a cutoff point of around 26 bytes. + */ -extern void * memswap ( void *dest, void *src, size_t len ); + edi = dest; + eax = 0; + + if ( len >= 26 ) + return __memset ( dest, 0, len ); -#define __HAVE_ARCH_STRNCMP + if ( len >= 6*4 ) + __asm__ __volatile__ ( "stosl" : "=&D" ( edi ), "=&a" ( eax ) + : "0" ( edi ), "1" ( eax ) : "memory" ); + if ( len >= 5*4 ) + __asm__ __volatile__ ( "stosl" : "=&D" ( edi ), "=&a" ( eax ) + : "0" ( edi ), "1" ( eax ) : "memory" ); + if ( len >= 4*4 ) + __asm__ __volatile__ ( "stosl" : "=&D" ( edi ), "=&a" ( eax ) + : "0" ( edi ), "1" ( eax ) : "memory" ); + if ( len >= 3*4 ) + __asm__ __volatile__ ( "stosl" : "=&D" ( edi ), "=&a" ( eax ) + : "0" ( edi ), "1" ( eax ) : "memory" ); + if ( len >= 2*4 ) + __asm__ __volatile__ ( "stosl" : "=&D" ( edi ), "=&a" ( eax ) + : "0" ( edi ), "1" ( eax ) : "memory" ); + if ( len >= 1*4 ) + __asm__ __volatile__ ( "stosl" : "=&D" ( edi ), "=&a" ( eax ) + : "0" ( edi ), "1" ( eax ) : "memory" ); + if ( ( len % 4 ) >= 2 ) + __asm__ __volatile__ ( "stosw" : "=&D" ( edi ), "=&a" ( eax ) + : "0" ( edi ), "1" ( eax ) : "memory" ); + if ( ( len % 2 ) >= 1 ) + __asm__ __volatile__ ( "stosb" : "=&D" ( edi ), "=&a" ( eax ) + : "0" ( edi ), "1" ( eax ) : "memory" ); -extern int strncmp ( const char *str1, const char *str2, size_t len ); + return dest; +} -#define __HAVE_ARCH_STRLEN +/** + * Fill memory region + * + * @v dest Destination address + * @v fill Fill pattern + * @v len Length + * @ret dest Destination address + */ +static inline __attribute__ (( always_inline )) void * +memset ( void *dest, int fill, size_t len ) { -extern size_t strlen ( const char *string ); + if ( __builtin_constant_p ( fill ) && ( fill == 0 ) && + __builtin_constant_p ( len ) ) { + return __constant_memset_zero ( dest, len ); + } else { + return __memset ( dest, fill, len ); + } +} #endif /* X86_BITS_STRING_H */ |