aboutsummaryrefslogtreecommitdiffstats
path: root/framework/src/suricata/src/util-spm-bs2bm.c
blob: d6529df85d12b9c5d825f72b71664be90d31bbae (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
/* Copyright (C) 2007-2010 Open Information Security Foundation
 *
 * You can copy, redistribute or modify this Program under the terms of
 * the GNU General Public License version 2 as published by the Free
 * Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * version 2 along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
 * 02110-1301, USA.
 */

/**
 * \file
 *
 * \author Pablo Rincon Crespo <pablo.rincon.crespo@gmail.com>
 *
 * Bs2Bm use a simple context array to determine the charactes
 * that are not present on the pattern. This way on partial matches
 * broken by a char not present, we can skip to the next character
 * making less checks
 */

#include "suricata-common.h"
#include "suricata.h"

#include "util-spm-bs2bm.h"

/**
 * \brief Array setup function for Bs2Bm of bad characters index (not found at the needle)
 *
 * \param neddle pointer to the pattern we ar searching for
 * \param needle_len length limit of the needle
 * \param badchars pointer to an empty array of bachars. The array prepared contains
 *                 characters that can't be inside the needle_len. So the skips can be
 *                 faster
 */
void Bs2BmBadchars(const uint8_t *needle, uint16_t needle_len, uint8_t *badchars)
{
    uint32_t i;
    for (i = 0; i < ALPHABET_SIZE; i++)
        badchars[i] = 1;

    /* set to 0 the values where index as ascii is present
     * because they are not badchars
     */
    for (i = 0; i < needle_len; i++)
        badchars[needle[i]] = 0;
}

/**
 * \brief Array setup function for Bs2BmNocase of bad characters index (not found at the needle)
 *
 * \param neddle pointer to the pattern we ar searching for
 * \param needle_len length limit of the needle
 * \param badchars pointer to an empty array of bachars. The array prepared contains
 *                 characters that can't be inside the needle_len. So the skips can be
 *                 faster
 */
void Bs2BmBadcharsNocase(const uint8_t *needle, uint16_t needle_len, uint8_t *badchars)
{
    uint32_t i;
    for (i = 0; i < ALPHABET_SIZE; i++)
        badchars[i] = 1;

    /* set to 0 the values where index as ascii is present
     * because they are not badchars
     */
    for (i = 0; i < needle_len; i++) {
        badchars[u8_tolower(needle[i])] = 0;
    }
}


/**
 * \brief Basic search with a bad characters array. The array badchars contains
 *        flags at character's ascii index that can't be inside the needle. So the skips can be
 *        faster
 *
 * \param haystack pointer to the buffer to search in
 * \param haystack_len length limit of the buffer
 * \param neddle pointer to the pattern we ar searching for
 * \param needle_len length limit of the needle
 * \param badchars pointer to an array of bachars prepared by Bs2BmBadchars()
 *
 * \retval ptr to start of the match; NULL if no match
 */
uint8_t * Bs2Bm(const uint8_t *haystack, uint32_t haystack_len, const uint8_t *needle, uint16_t needle_len, uint8_t badchars[])
{
    const uint8_t *h, *n;
    const uint8_t *hmax = haystack + haystack_len;
    const uint8_t *nmax = needle + needle_len;

    if (needle_len == 0 || needle_len > haystack_len)
        return NULL;

    for (n = needle; nmax - n <= hmax - haystack; haystack++) {
        if (*haystack != *n) {
            continue;
        }
        /* one byte needles */
        if (needle_len == 1)
            return (uint8_t *)haystack;

        for (h = haystack+1, n++; nmax - n <= hmax - haystack; h++, n++) {
            if (*h != *n) {
                if (badchars[*h] == 1) {
                    /* skip it! */
                    haystack = h;
                }
                break;
            }
            /* if we run out of needle we fully matched */
            if (n == nmax - 1 ) {
                return (uint8_t *)haystack;
            }
        }
        n = needle;
    }

    return NULL;
}

/**
 * \brief Basic search case less with a bad characters array. The array badchars contains
 *        flags at character's ascii index that can't be inside the needle. So the skips can be
 *        faster
 *
 * \param haystack pointer to the buffer to search in
 * \param haystack_len length limit of the buffer
 * \param neddle pointer to the pattern we ar searching for
 * \param needle_len length limit of the needle
 * \param badchars pointer to an array of bachars prepared by Bs2BmBadchars()
 *
 * \retval ptr to start of the match; NULL if no match
 */
uint8_t *Bs2BmNocase(const uint8_t *haystack, uint32_t haystack_len, const uint8_t *needle, uint16_t needle_len, uint8_t badchars[])
{
    const uint8_t *h, *n;
    const uint8_t *hmax = haystack + haystack_len;
    const uint8_t *nmax = needle + needle_len;

    if (needle_len == 0 || needle_len > haystack_len)
        return NULL;

    for (n = needle; nmax - n <= hmax - haystack; haystack++) {
        if (u8_tolower(*haystack) != u8_tolower(*n)) {
            continue;
        }
        /* one byte needles */
        if (needle_len == 1)
            return (uint8_t *)haystack;

        for (h = haystack+1, n++; nmax - n <= hmax - haystack; h++, n++) {
            if (u8_tolower(*h) != u8_tolower(*n)) {
                if (badchars[u8_tolower(*h)] == 1) {
                    /* skip it! */
                    haystack = h;
                }
                break;
            }
            /* if we run out of needle we fully matched */
            if (n == nmax - 1) {
                return (uint8_t *)haystack;
            }
        }
        n = needle;
    }

    return NULL;
}