diff options
author | Ashlee Young <ashlee@onosfw.com> | 2015-09-09 22:21:41 -0700 |
---|---|---|
committer | Ashlee Young <ashlee@onosfw.com> | 2015-09-09 22:21:41 -0700 |
commit | 8879b125d26e8db1a5633de5a9c692eb2d1c4f83 (patch) | |
tree | c7259d85a991b83dfa85ab2e339360669fc1f58e /framework/src/suricata/src/util-memcmp.h | |
parent | 13d05bc8458758ee39cb829098241e89616717ee (diff) |
suricata checkin based on commit id a4bce14770beee46a537eda3c3f6e8e8565d5d0a
Change-Id: I9a214fa0ee95e58fc640e50bd604dac7f42db48f
Diffstat (limited to 'framework/src/suricata/src/util-memcmp.h')
-rw-r--r-- | framework/src/suricata/src/util-memcmp.h | 502 |
1 files changed, 502 insertions, 0 deletions
diff --git a/framework/src/suricata/src/util-memcmp.h b/framework/src/suricata/src/util-memcmp.h new file mode 100644 index 00000000..9248f842 --- /dev/null +++ b/framework/src/suricata/src/util-memcmp.h @@ -0,0 +1,502 @@ +/* Copyright (C) 2007-2013 Open Information Security Foundation + * + * You can copy, redistribute or modify this Program under the terms of + * the GNU General Public License version 2 as published by the Free + * Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +/** + * \file + * + * \author Victor Julien <victor@inliniac.net> + * + * Memcmp implementations for SSE3, SSE4.1, SSE4.2 and TILE-Gx SIMD. + * + * Both SCMemcmp and SCMemcmpLowercase return 0 on a exact match, + * 1 on a failed match. + */ + +#ifndef __UTIL_MEMCMP_H__ +#define __UTIL_MEMCMP_H__ + +#include "util-optimize.h" + +/** \brief compare two patterns, converting the 2nd to lowercase + * \warning *ONLY* the 2nd pattern is converted to lowercase + */ +static inline int SCMemcmpLowercase(const void *, const void *, size_t); + +void MemcmpRegisterTests(void); + +static inline int +MemcmpLowercase(const void *s1, const void *s2, size_t n) +{ + ssize_t i; + + /* check backwards because we already tested the first + * 2 to 4 chars. This way we are more likely to detect + * a miss and thus speed up a little... */ + for (i = n - 1; i >= 0; i--) { + if (((uint8_t *)s1)[i] != u8_tolower(*(((uint8_t *)s2)+i))) + return 1; + } + + return 0; +} + +#if defined(__SSE4_2__) + +#include <nmmintrin.h> + +/* No SIMD support, fall back to plain memcmp and a home grown lowercase one */ + +static inline int SCMemcmp(const void *s1, const void *s2, size_t n) +{ + __m128i b1, b2; + + int r; + /* counter for how far we already matched in the buffer */ + size_t m = 0; + + do { + /* apparently we can't just read 16 bytes even though + * it almost always works fine :) */ + if (likely(n - m < 16)) { + return memcmp(s1, s2, n - m) ? 1 : 0; + } + + /* load the buffers into the 128bit vars */ + b1 = _mm_loadu_si128((const __m128i *) s1); + b2 = _mm_loadu_si128((const __m128i *) s2); + + /* do the actual compare */ + m += (r = _mm_cmpestri(b1, n - m, b2, 16, + _SIDD_CMP_EQUAL_EACH | _SIDD_MASKED_NEGATIVE_POLARITY)); + + s1 += 16; + s2 += 16; + } while (r == 16); + + return ((m == n) ? 0 : 1); +} + +/* Range of values of uppercase characters. We only use the first 2 bytes. */ +static char scmemcmp_uppercase[16] __attribute__((aligned(16))) = { + 'A', 'Z', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; + +/** \brief compare two buffers in a case insensitive way + * \param s1 buffer already in lowercase + * \param s2 buffer with mixed upper and lowercase + */ +static inline int SCMemcmpLowercase(const void *s1, const void *s2, size_t n) +{ + __m128i b1, b2, mask; + + int r; + /* counter for how far we already matched in the buffer */ + size_t m = 0; + + __m128i ucase = _mm_load_si128((const __m128i *) scmemcmp_uppercase); + __m128i nulls = _mm_setzero_si128(); + __m128i uplow = _mm_set1_epi8(0x20); + + do { + /* apparently we can't just read 16 bytes even though + * it almost always works fine :) */ + if (likely(n - m < 16)) { + return MemcmpLowercase(s1, s2, n - m); + } + + b1 = _mm_loadu_si128((const __m128i *) s1); + b2 = _mm_loadu_si128((const __m128i *) s2); + size_t len = n - m; + + /* The first step is creating a mask that is FF for all uppercase + * characters, 00 for all others */ + mask = _mm_cmpestrm(ucase, 2, b2, len, _SIDD_CMP_RANGES | _SIDD_UNIT_MASK); + /* Next we use that mask to create a new: this one has 0x20 for + * the uppercase chars, 00 for all other. */ + mask = _mm_blendv_epi8(nulls, uplow, mask); + /* finally, merge the mask and the buffer converting the + * uppercase to lowercase */ + b2 = _mm_add_epi8(b2, mask); + + /* search using our converted buffer */ + m += (r = _mm_cmpestri(b1, len, b2, 16, + _SIDD_CMP_EQUAL_EACH | _SIDD_MASKED_NEGATIVE_POLARITY)); + + s1 += 16; + s2 += 16; + } while (r == 16); + + return ((m == n) ? 0 : 1); +} + +#elif defined(__SSE4_1__) + +#include <smmintrin.h> + +#define SCMEMCMP_BYTES 16 + +static inline int SCMemcmp(const void *s1, const void *s2, size_t len) +{ + size_t offset = 0; + __m128i b1, b2, c; + + do { + /* apparently we can't just read 16 bytes even though + * it almost always works fine :) */ + if (likely(len - offset < 16)) { + return memcmp(s1, s2, len - offset) ? 1 : 0; + } + + /* do unaligned loads using _mm_loadu_si128. On my Core2 E6600 using + * _mm_lddqu_si128 was about 2% slower even though it's supposed to + * be faster. */ + b1 = _mm_loadu_si128((const __m128i *) s1); + b2 = _mm_loadu_si128((const __m128i *) s2); + c = _mm_cmpeq_epi8(b1, b2); + + int diff = len - offset; + if (diff < 16) { + int rmask = ~(0xFFFFFFFF << diff); + + if ((_mm_movemask_epi8(c) & rmask) != rmask) { + return 1; + } + } else { + if (_mm_movemask_epi8(c) != 0x0000FFFF) { + return 1; + } + } + + offset += SCMEMCMP_BYTES; + s1 += SCMEMCMP_BYTES; + s2 += SCMEMCMP_BYTES; + } while (len > offset); + + return 0; +} + +#define UPPER_LOW 0x40 /* "A" - 1 */ +#define UPPER_HIGH 0x5B /* "Z" + 1 */ + +static inline int SCMemcmpLowercase(const void *s1, const void *s2, size_t len) +{ + size_t offset = 0; + __m128i b1, b2, mask1, mask2, upper1, upper2, nulls, uplow; + + /* setup registers for upper to lower conversion */ + upper1 = _mm_set1_epi8(UPPER_LOW); + upper2 = _mm_set1_epi8(UPPER_HIGH); + nulls = _mm_setzero_si128(); + uplow = _mm_set1_epi8(0x20); + + do { + /* apparently we can't just read 16 bytes even though + * it almost always works fine :) */ + if (likely(len - offset < 16)) { + return MemcmpLowercase(s1, s2, len - offset); + } + + /* unaligned loading of the bytes to compare */ + b1 = _mm_loadu_si128((const __m128i *) s1); + b2 = _mm_loadu_si128((const __m128i *) s2); + + /* mark all chars bigger than upper1 */ + mask1 = _mm_cmpgt_epi8(b2, upper1); + /* mark all chars lower than upper2 */ + mask2 = _mm_cmplt_epi8(b2, upper2); + /* merge the two, leaving only those that are true in both */ + mask1 = _mm_cmpeq_epi8(mask1, mask2); + /* Next we use that mask to create a new: this one has 0x20 for + * the uppercase chars, 00 for all other. */ + mask1 = _mm_blendv_epi8(nulls, uplow, mask1); + + /* add to b2, converting uppercase to lowercase */ + b2 = _mm_add_epi8(b2, mask1); + + /* now all is lowercase, let's do the actual compare (reuse mask1 reg) */ + mask1 = _mm_cmpeq_epi8(b1, b2); + + int diff = len - offset; + if (diff < 16) { + int rmask = ~(0xFFFFFFFF << diff); + + if ((_mm_movemask_epi8(mask1) & rmask) != rmask) { + return 1; + } + } else { + if (_mm_movemask_epi8(mask1) != 0x0000FFFF) { + return 1; + } + } + + offset += SCMEMCMP_BYTES; + s1 += SCMEMCMP_BYTES; + s2 += SCMEMCMP_BYTES; + } while (len > offset); + + return 0; +} + + + +#elif defined(__SSE3__) + +#include <pmmintrin.h> /* for SSE3 */ + +#define SCMEMCMP_BYTES 16 + +static inline int SCMemcmp(const void *s1, const void *s2, size_t len) +{ + size_t offset = 0; + __m128i b1, b2, c; + + do { + /* apparently we can't just read 16 bytes even though + * it almost always works fine :) */ + if (likely(len - offset < 16)) { + return memcmp(s1, s2, len - offset) ? 1 : 0; + } + + /* do unaligned loads using _mm_loadu_si128. On my Core2 E6600 using + * _mm_lddqu_si128 was about 2% slower even though it's supposed to + * be faster. */ + b1 = _mm_loadu_si128((const __m128i *) s1); + b2 = _mm_loadu_si128((const __m128i *) s2); + c = _mm_cmpeq_epi8(b1, b2); + + int diff = len - offset; + if (diff < 16) { + int rmask = ~(0xFFFFFFFF << diff); + + if ((_mm_movemask_epi8(c) & rmask) != rmask) { + return 1; + } + } else { + if (_mm_movemask_epi8(c) != 0x0000FFFF) { + return 1; + } + } + + offset += SCMEMCMP_BYTES; + s1 += SCMEMCMP_BYTES; + s2 += SCMEMCMP_BYTES; + } while (len > offset); + + return 0; +} + +#define UPPER_LOW 0x40 /* "A" - 1 */ +#define UPPER_HIGH 0x5B /* "Z" + 1 */ +#define UPPER_DELTA 0xDF /* 0xFF - 0x20 */ + +static inline int SCMemcmpLowercase(const void *s1, const void *s2, size_t len) +{ + size_t offset = 0; + __m128i b1, b2, mask1, mask2, upper1, upper2, delta; + + /* setup registers for upper to lower conversion */ + upper1 = _mm_set1_epi8(UPPER_LOW); + upper2 = _mm_set1_epi8(UPPER_HIGH); + delta = _mm_set1_epi8(UPPER_DELTA); + + do { + /* apparently we can't just read 16 bytes even though + * it almost always works fine :) */ + if (likely(len - offset < 16)) { + return MemcmpLowercase(s1, s2, len - offset); + } + + /* unaligned loading of the bytes to compare */ + b1 = _mm_loadu_si128((const __m128i *) s1); + b2 = _mm_loadu_si128((const __m128i *) s2); + + /* mark all chars bigger than upper1 */ + mask1 = _mm_cmpgt_epi8(b2, upper1); + /* mark all chars lower than upper2 */ + mask2 = _mm_cmplt_epi8(b2, upper2); + /* merge the two, leaving only those that are true in both */ + mask1 = _mm_cmpeq_epi8(mask1, mask2); + + /* sub delta leaves 0x20 only for uppercase positions, the + rest is 0x00 due to the saturation (reuse mask1 reg)*/ + mask1 = _mm_subs_epu8(mask1, delta); + + /* add to b2, converting uppercase to lowercase */ + b2 = _mm_add_epi8(b2, mask1); + + /* now all is lowercase, let's do the actual compare (reuse mask1 reg) */ + mask1 = _mm_cmpeq_epi8(b1, b2); + + int diff = len - offset; + if (diff < 16) { + int rmask = ~(0xFFFFFFFF << diff); + + if ((_mm_movemask_epi8(mask1) & rmask) != rmask) { + return 1; + } + } else { + if (_mm_movemask_epi8(mask1) != 0x0000FFFF) { + return 1; + } + } + + offset += SCMEMCMP_BYTES; + s1 += SCMEMCMP_BYTES; + s2 += SCMEMCMP_BYTES; + } while (len > offset); + + return 0; +} + +#elif defined(__tile__) + +#include <ctype.h> + +/* Compare to non-zero sequence of bytes. */ +static inline int SCMemcmpNZ(const void *s1, const void *s2, size_t len) +{ + uint64_t b1, w1, aligned1; + uint64_t b2, w2, aligned2; + + /* Load aligned words containing the beginning of each string. + * These loads don't trigger unaligned events. + */ + w1 = __insn_ldna(s1); + w2 = __insn_ldna(s2); + /* Can't just read next 8 bytes because it might go past the end + * of a page. */ + while (len > 8) { + /* Here, the buffer extends into the next word by at least one + * byte, so it is safe to read the next word. Do an aligned + * loads on the next word. Then use the two words to create + * an aligned word from each string. */ + b1 = __insn_ldna(s1 + 8); + b2 = __insn_ldna(s2 + 8); + aligned1 = __insn_dblalign(w1, b1, s1); + aligned2 = __insn_dblalign(w2, b2, s2); + if (aligned1 != aligned2) + return 1; + + /* Move forward one word (8 bytes) */ + w1 = b1; + w2 = b2; + len -= 8; + s1 += 8; + s2 += 8; + } + /* Process the last up-to 8 bytes. */ + do { + if (*(char*)s1 != *(char*)s2) + return 1; + s1++; + s2++; + len--; + } while (len); + + return 0; +} + +/* Compare two sequences of bytes. */ +static inline int SCMemcmp(const void *s1, const void *s2, size_t len) +{ + if (len == 0) + return 0; + return SCMemcmpNZ(s1, s2, len); +} +/** \brief Convert 8 characters to lower case using SIMD. + * \param Word containing the 8 bytes. + * \return Word containing 8-bytes each converted to lowercase. + */ +static inline uint64_t +vec_tolower(uint64_t cc) +{ + /* For Uppercases letters, add 32 to convert to lower case. */ + uint64_t less_than_eq_Z = __insn_v1cmpltui (cc, 'Z' + 1); + uint64_t less_than_A = __insn_v1cmpltui (cc, 'A'); + uint64_t is_upper = __insn_v1cmpne (less_than_eq_Z, less_than_A); + return __insn_v1add (cc,__insn_v1shli (is_upper, 5)); +} + +/** \brief compare two buffers in a case insensitive way + * \param s1 buffer already in lowercase + * \param s2 buffer with mixed upper and lowercase + */ +static inline int SCMemcmpLowercase(const void *s1, const void *s2, size_t len) +{ + uint64_t b1, w1, aligned1; + uint64_t b2, w2, aligned2; + + if (len == 0) + return 0; + + /* TODO Check for already aligned cases. To optimize. */ + + /* Load word containing the beginning of each string. + * These loads don't trigger unaligned events. + */ + w1 = __insn_ldna(s1); + w2 = __insn_ldna(s2); + /* Can't just read next 8 bytes because it might go past the end + * of a page. */ + while (len > 8) { + /* Here, the buffer extends into the next word by at least one + * byte, so it is safe to read the next word. Do aligned + * loads on next word. Then use the two words to create an + * aligned word from each string. */ + b1 = __insn_ldna(s1 + 8); + b2 = __insn_ldna(s2 + 8); + aligned1 = __insn_dblalign(w1, b1, s1); + aligned2 = vec_tolower(__insn_dblalign(w2, b2, s2)); + if (aligned1 != aligned2) + return 1; + + /* Move forward one word (8 bytes) */ + w1 = b1; + w2 = b2; + len -= 8; + s1 += 8; + s2 += 8; + } + + do { + if (*(char*)s1 != tolower(*(char*)s2)) + return 1; + s1++; + s2++; + len--; + } while (len); + + return 0; +} + +#else + +/* No SIMD support, fall back to plain memcmp and a home grown lowercase one */ + +/* wrapper around memcmp to match the retvals of the SIMD implementations */ +#define SCMemcmp(a,b,c) ({ \ + memcmp((a), (b), (c)) ? 1 : 0; \ +}) + +static inline int SCMemcmpLowercase(const void *s1, const void *s2, size_t len) +{ + return MemcmpLowercase(s1, s2, len); +} + +#endif /* SIMD */ + +#endif /* __UTIL_MEMCMP_H__ */ + |