diff options
Diffstat (limited to 'qemu/util/cutils.c')
-rw-r--r-- | qemu/util/cutils.c | 363 |
1 files changed, 317 insertions, 46 deletions
diff --git a/qemu/util/cutils.c b/qemu/util/cutils.c index 5d1c9ebe0..43d1afbbe 100644 --- a/qemu/util/cutils.c +++ b/qemu/util/cutils.c @@ -21,15 +21,15 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ +#include "qemu/osdep.h" #include "qemu-common.h" #include "qemu/host-utils.h" #include <math.h> -#include <limits.h> -#include <errno.h> #include "qemu/sockets.h" #include "qemu/iov.h" #include "net/net.h" +#include "qemu/cutils.h" void strpadcpy(char *buf, int buf_size, const char *str, char pad) { @@ -145,11 +145,6 @@ time_t mktimegm(struct tm *tm) return t; } -int qemu_fls(int i) -{ - return 32 - clz32(i); -} - /* * Make sure data goes on disk, but if possible do not bother to * write out the inode just for timestamp updates. @@ -166,6 +161,46 @@ int qemu_fdatasync(int fd) #endif } +/* vector definitions */ +#ifdef __ALTIVEC__ +#include <altivec.h> +/* The altivec.h header says we're allowed to undef these for + * C++ compatibility. Here we don't care about C++, but we + * undef them anyway to avoid namespace pollution. + */ +#undef vector +#undef pixel +#undef bool +#define VECTYPE __vector unsigned char +#define SPLAT(p) vec_splat(vec_ld(0, p), 0) +#define ALL_EQ(v1, v2) vec_all_eq(v1, v2) +#define VEC_OR(v1, v2) ((v1) | (v2)) +/* altivec.h may redefine the bool macro as vector type. + * Reset it to POSIX semantics. */ +#define bool _Bool +#elif defined __SSE2__ +#include <emmintrin.h> +#define VECTYPE __m128i +#define SPLAT(p) _mm_set1_epi8(*(p)) +#define ALL_EQ(v1, v2) (_mm_movemask_epi8(_mm_cmpeq_epi8(v1, v2)) == 0xFFFF) +#define VEC_OR(v1, v2) (_mm_or_si128(v1, v2)) +#else +#define VECTYPE unsigned long +#define SPLAT(p) (*(p) * (~0UL / 255)) +#define ALL_EQ(v1, v2) ((v1) == (v2)) +#define VEC_OR(v1, v2) ((v1) | (v2)) +#endif + +#define BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR 8 + +static bool +can_use_buffer_find_nonzero_offset_inner(const void *buf, size_t len) +{ + return (len % (BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR + * sizeof(VECTYPE)) == 0 + && ((uintptr_t) buf) % sizeof(VECTYPE) == 0); +} + /* * Searches for an area with non-zero content in a buffer * @@ -174,8 +209,8 @@ int qemu_fdatasync(int fd) * and addr must be a multiple of sizeof(VECTYPE) due to * restriction of optimizations in this function. * - * can_use_buffer_find_nonzero_offset() can be used to check - * these requirements. + * can_use_buffer_find_nonzero_offset_inner() can be used to + * check these requirements. * * The return value is the offset of the non-zero area rounded * down to a multiple of sizeof(VECTYPE) for the first @@ -186,13 +221,13 @@ int qemu_fdatasync(int fd) * If the buffer is all zero the return value is equal to len. */ -size_t buffer_find_nonzero_offset(const void *buf, size_t len) +static size_t buffer_find_nonzero_offset_inner(const void *buf, size_t len) { const VECTYPE *p = buf; const VECTYPE zero = (VECTYPE){0}; size_t i; - assert(can_use_buffer_find_nonzero_offset(buf, len)); + assert(can_use_buffer_find_nonzero_offset_inner(buf, len)); if (!len) { return 0; @@ -222,6 +257,114 @@ size_t buffer_find_nonzero_offset(const void *buf, size_t len) } /* + * GCC before version 4.9 has a bug which will cause the target + * attribute work incorrectly and failed to compile in some case, + * restrict the gcc version to 4.9+ to prevent the failure. + */ + +#if defined CONFIG_AVX2_OPT && QEMU_GNUC_PREREQ(4, 9) +#pragma GCC push_options +#pragma GCC target("avx2") +#include <cpuid.h> +#include <immintrin.h> + +#define AVX2_VECTYPE __m256i +#define AVX2_SPLAT(p) _mm256_set1_epi8(*(p)) +#define AVX2_ALL_EQ(v1, v2) \ + (_mm256_movemask_epi8(_mm256_cmpeq_epi8(v1, v2)) == 0xFFFFFFFF) +#define AVX2_VEC_OR(v1, v2) (_mm256_or_si256(v1, v2)) + +static bool +can_use_buffer_find_nonzero_offset_avx2(const void *buf, size_t len) +{ + return (len % (BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR + * sizeof(AVX2_VECTYPE)) == 0 + && ((uintptr_t) buf) % sizeof(AVX2_VECTYPE) == 0); +} + +static size_t buffer_find_nonzero_offset_avx2(const void *buf, size_t len) +{ + const AVX2_VECTYPE *p = buf; + const AVX2_VECTYPE zero = (AVX2_VECTYPE){0}; + size_t i; + + assert(can_use_buffer_find_nonzero_offset_avx2(buf, len)); + + if (!len) { + return 0; + } + + for (i = 0; i < BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR; i++) { + if (!AVX2_ALL_EQ(p[i], zero)) { + return i * sizeof(AVX2_VECTYPE); + } + } + + for (i = BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR; + i < len / sizeof(AVX2_VECTYPE); + i += BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR) { + AVX2_VECTYPE tmp0 = AVX2_VEC_OR(p[i + 0], p[i + 1]); + AVX2_VECTYPE tmp1 = AVX2_VEC_OR(p[i + 2], p[i + 3]); + AVX2_VECTYPE tmp2 = AVX2_VEC_OR(p[i + 4], p[i + 5]); + AVX2_VECTYPE tmp3 = AVX2_VEC_OR(p[i + 6], p[i + 7]); + AVX2_VECTYPE tmp01 = AVX2_VEC_OR(tmp0, tmp1); + AVX2_VECTYPE tmp23 = AVX2_VEC_OR(tmp2, tmp3); + if (!AVX2_ALL_EQ(AVX2_VEC_OR(tmp01, tmp23), zero)) { + break; + } + } + + return i * sizeof(AVX2_VECTYPE); +} + +static bool avx2_support(void) +{ + int a, b, c, d; + + if (__get_cpuid_max(0, NULL) < 7) { + return false; + } + + __cpuid_count(7, 0, a, b, c, d); + + return b & bit_AVX2; +} + +bool can_use_buffer_find_nonzero_offset(const void *buf, size_t len) \ + __attribute__ ((ifunc("can_use_buffer_find_nonzero_offset_ifunc"))); +size_t buffer_find_nonzero_offset(const void *buf, size_t len) \ + __attribute__ ((ifunc("buffer_find_nonzero_offset_ifunc"))); + +static void *buffer_find_nonzero_offset_ifunc(void) +{ + typeof(buffer_find_nonzero_offset) *func = (avx2_support()) ? + buffer_find_nonzero_offset_avx2 : buffer_find_nonzero_offset_inner; + + return func; +} + +static void *can_use_buffer_find_nonzero_offset_ifunc(void) +{ + typeof(can_use_buffer_find_nonzero_offset) *func = (avx2_support()) ? + can_use_buffer_find_nonzero_offset_avx2 : + can_use_buffer_find_nonzero_offset_inner; + + return func; +} +#pragma GCC pop_options +#else +bool can_use_buffer_find_nonzero_offset(const void *buf, size_t len) +{ + return can_use_buffer_find_nonzero_offset_inner(buf, len); +} + +size_t buffer_find_nonzero_offset(const void *buf, size_t len) +{ + return buffer_find_nonzero_offset_inner(buf, len); +} +#endif + +/* * Checks if a buffer is all zeroes * * Attention! The len must be a multiple of 4 * sizeof(long) due to @@ -281,19 +424,19 @@ int fcntl_setfl(int fd, int flag) static int64_t suffix_mul(char suffix, int64_t unit) { switch (qemu_toupper(suffix)) { - case STRTOSZ_DEFSUFFIX_B: + case QEMU_STRTOSZ_DEFSUFFIX_B: return 1; - case STRTOSZ_DEFSUFFIX_KB: + case QEMU_STRTOSZ_DEFSUFFIX_KB: return unit; - case STRTOSZ_DEFSUFFIX_MB: + case QEMU_STRTOSZ_DEFSUFFIX_MB: return unit * unit; - case STRTOSZ_DEFSUFFIX_GB: + case QEMU_STRTOSZ_DEFSUFFIX_GB: return unit * unit * unit; - case STRTOSZ_DEFSUFFIX_TB: + case QEMU_STRTOSZ_DEFSUFFIX_TB: return unit * unit * unit * unit; - case STRTOSZ_DEFSUFFIX_PB: + case QEMU_STRTOSZ_DEFSUFFIX_PB: return unit * unit * unit * unit * unit; - case STRTOSZ_DEFSUFFIX_EB: + case QEMU_STRTOSZ_DEFSUFFIX_EB: return unit * unit * unit * unit * unit * unit; } return -1; @@ -305,7 +448,7 @@ static int64_t suffix_mul(char suffix, int64_t unit) * in *end, if not NULL. Return -ERANGE on overflow, Return -EINVAL on * other error. */ -int64_t strtosz_suffix_unit(const char *nptr, char **end, +int64_t qemu_strtosz_suffix_unit(const char *nptr, char **end, const char default_suffix, int64_t unit) { int64_t retval = -EINVAL; @@ -348,14 +491,165 @@ fail: return retval; } -int64_t strtosz_suffix(const char *nptr, char **end, const char default_suffix) +int64_t qemu_strtosz_suffix(const char *nptr, char **end, + const char default_suffix) { - return strtosz_suffix_unit(nptr, end, default_suffix, 1024); + return qemu_strtosz_suffix_unit(nptr, end, default_suffix, 1024); } -int64_t strtosz(const char *nptr, char **end) +int64_t qemu_strtosz(const char *nptr, char **end) { - return strtosz_suffix(nptr, end, STRTOSZ_DEFSUFFIX_MB); + return qemu_strtosz_suffix(nptr, end, QEMU_STRTOSZ_DEFSUFFIX_MB); +} + +/** + * Helper function for qemu_strto*l() functions. + */ +static int check_strtox_error(const char *p, char *endptr, const char **next, + int err) +{ + /* If no conversion was performed, prefer BSD behavior over glibc + * behavior. + */ + if (err == 0 && endptr == p) { + err = EINVAL; + } + if (!next && *endptr) { + return -EINVAL; + } + if (next) { + *next = endptr; + } + return -err; +} + +/** + * QEMU wrappers for strtol(), strtoll(), strtoul(), strotull() C functions. + * + * Convert ASCII string @nptr to a long integer value + * from the given @base. Parameters @nptr, @endptr, @base + * follows same semantics as strtol() C function. + * + * Unlike from strtol() function, if @endptr is not NULL, this + * function will return -EINVAL whenever it cannot fully convert + * the string in @nptr with given @base to a long. This function returns + * the result of the conversion only through the @result parameter. + * + * If NULL is passed in @endptr, then the whole string in @ntpr + * is a number otherwise it returns -EINVAL. + * + * RETURN VALUE + * Unlike from strtol() function, this wrapper returns either + * -EINVAL or the errno set by strtol() function (e.g -ERANGE). + * If the conversion overflows, -ERANGE is returned, and @result + * is set to the max value of the desired type + * (e.g. LONG_MAX, LLONG_MAX, ULONG_MAX, ULLONG_MAX). If the case + * of underflow, -ERANGE is returned, and @result is set to the min + * value of the desired type. For strtol(), strtoll(), @result is set to + * LONG_MIN, LLONG_MIN, respectively, and for strtoul(), strtoull() it + * is set to 0. + */ +int qemu_strtol(const char *nptr, const char **endptr, int base, + long *result) +{ + char *p; + int err = 0; + if (!nptr) { + if (endptr) { + *endptr = nptr; + } + err = -EINVAL; + } else { + errno = 0; + *result = strtol(nptr, &p, base); + err = check_strtox_error(nptr, p, endptr, errno); + } + return err; +} + +/** + * Converts ASCII string to an unsigned long integer. + * + * If string contains a negative number, value will be converted to + * the unsigned representation of the signed value, unless the original + * (nonnegated) value would overflow, in this case, it will set @result + * to ULONG_MAX, and return ERANGE. + * + * The same behavior holds, for qemu_strtoull() but sets @result to + * ULLONG_MAX instead of ULONG_MAX. + * + * See qemu_strtol() documentation for more info. + */ +int qemu_strtoul(const char *nptr, const char **endptr, int base, + unsigned long *result) +{ + char *p; + int err = 0; + if (!nptr) { + if (endptr) { + *endptr = nptr; + } + err = -EINVAL; + } else { + errno = 0; + *result = strtoul(nptr, &p, base); + /* Windows returns 1 for negative out-of-range values. */ + if (errno == ERANGE) { + *result = -1; + } + err = check_strtox_error(nptr, p, endptr, errno); + } + return err; +} + +/** + * Converts ASCII string to a long long integer. + * + * See qemu_strtol() documentation for more info. + */ +int qemu_strtoll(const char *nptr, const char **endptr, int base, + int64_t *result) +{ + char *p; + int err = 0; + if (!nptr) { + if (endptr) { + *endptr = nptr; + } + err = -EINVAL; + } else { + errno = 0; + *result = strtoll(nptr, &p, base); + err = check_strtox_error(nptr, p, endptr, errno); + } + return err; +} + +/** + * Converts ASCII string to an unsigned long long integer. + * + * See qemu_strtol() documentation for more info. + */ +int qemu_strtoull(const char *nptr, const char **endptr, int base, + uint64_t *result) +{ + char *p; + int err = 0; + if (!nptr) { + if (endptr) { + *endptr = nptr; + } + err = -EINVAL; + } else { + errno = 0; + *result = strtoull(nptr, &p, base); + /* Windows returns 1 for negative out-of-range values. */ + if (errno == ERANGE) { + *result = -1; + } + err = check_strtox_error(nptr, p, endptr, errno); + } + return err; } /** @@ -474,29 +768,6 @@ int qemu_parse_fd(const char *param) return fd; } -/* round down to the nearest power of 2*/ -int64_t pow2floor(int64_t value) -{ - if (!is_power_of_2(value)) { - value = 0x8000000000000000ULL >> clz64(value); - } - return value; -} - -/* round up to the nearest power of 2 (0 if overflow) */ -uint64_t pow2ceil(uint64_t value) -{ - uint8_t nlz = clz64(value); - - if (is_power_of_2(value)) { - return value; - } - if (!nlz) { - return 0; - } - return 1ULL << (64 - nlz); -} - /* * Implementation of ULEB128 (http://en.wikipedia.org/wiki/LEB128) * Input is limited to 14-bit numbers |