summaryrefslogtreecommitdiffstats
path: root/rubbos/app/httpd-2.0.64/srclib/pcre/study.c
diff options
context:
space:
mode:
authorhongbotian <hongbo.tianhongbo@huawei.com>2015-11-30 01:45:08 -0500
committerhongbotian <hongbo.tianhongbo@huawei.com>2015-11-30 01:45:08 -0500
commite8ec7aa8e38a93f5b034ac74cebce5de23710317 (patch)
treeaa031937bf856c1f8d6ad7877b8d2cb0224da5ef /rubbos/app/httpd-2.0.64/srclib/pcre/study.c
parentcc40af334e619bb549038238507407866f774f8f (diff)
upload http
JIRA: BOTTLENECK-10 Change-Id: I7598427ff904df438ce77c2819ee48ac75ffa8da Signed-off-by: hongbotian <hongbo.tianhongbo@huawei.com>
Diffstat (limited to 'rubbos/app/httpd-2.0.64/srclib/pcre/study.c')
-rw-r--r--rubbos/app/httpd-2.0.64/srclib/pcre/study.c401
1 files changed, 401 insertions, 0 deletions
diff --git a/rubbos/app/httpd-2.0.64/srclib/pcre/study.c b/rubbos/app/httpd-2.0.64/srclib/pcre/study.c
new file mode 100644
index 00000000..f924543d
--- /dev/null
+++ b/rubbos/app/httpd-2.0.64/srclib/pcre/study.c
@@ -0,0 +1,401 @@
+/*************************************************
+* Perl-Compatible Regular Expressions *
+*************************************************/
+
+/*
+This is a library of functions to support regular expressions whose syntax
+and semantics are as close as possible to those of the Perl 5 language. See
+the file Tech.Notes for some information on the internals.
+
+Written by: Philip Hazel <ph10@cam.ac.uk>
+
+ Copyright (c) 1997-2001 University of Cambridge
+
+-----------------------------------------------------------------------------
+Permission is granted to anyone to use this software for any purpose on any
+computer system, and to redistribute it freely, subject to the following
+restrictions:
+
+1. This software is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+2. The origin of this software must not be misrepresented, either by
+ explicit claim or by omission.
+
+3. Altered versions must be plainly marked as such, and must not be
+ misrepresented as being the original software.
+
+4. If PCRE is embedded in any software that is released under the GNU
+ General Purpose Licence (GPL), then the terms of that licence shall
+ supersede any condition above with which it is incompatible.
+-----------------------------------------------------------------------------
+*/
+
+
+/* Include the internals header, which itself includes Standard C headers plus
+the external pcre header. */
+
+#include "internal.h"
+
+
+
+/*************************************************
+* Set a bit and maybe its alternate case *
+*************************************************/
+
+/* Given a character, set its bit in the table, and also the bit for the other
+version of a letter if we are caseless.
+
+Arguments:
+ start_bits points to the bit map
+ c is the character
+ caseless the caseless flag
+ cd the block with char table pointers
+
+Returns: nothing
+*/
+
+static void
+set_bit(uschar *start_bits, int c, BOOL caseless, compile_data *cd)
+{
+start_bits[c/8] |= (1 << (c&7));
+if (caseless && (cd->ctypes[c] & ctype_letter) != 0)
+ start_bits[cd->fcc[c]/8] |= (1 << (cd->fcc[c]&7));
+}
+
+
+
+/*************************************************
+* Create bitmap of starting chars *
+*************************************************/
+
+/* This function scans a compiled unanchored expression and attempts to build a
+bitmap of the set of initial characters. If it can't, it returns FALSE. As time
+goes by, we may be able to get more clever at doing this.
+
+Arguments:
+ code points to an expression
+ start_bits points to a 32-byte table, initialized to 0
+ caseless the current state of the caseless flag
+ cd the block with char table pointers
+
+Returns: TRUE if table built, FALSE otherwise
+*/
+
+static BOOL
+set_start_bits(const uschar *code, uschar *start_bits, BOOL caseless,
+ compile_data *cd)
+{
+register int c;
+
+/* This next statement and the later reference to dummy are here in order to
+trick the optimizer of the IBM C compiler for OS/2 into generating correct
+code. Apparently IBM isn't going to fix the problem, and we would rather not
+disable optimization (in this module it actually makes a big difference, and
+the pcre module can use all the optimization it can get). */
+
+volatile int dummy;
+
+do
+ {
+ const uschar *tcode = code + 3;
+ BOOL try_next = TRUE;
+
+ while (try_next)
+ {
+ /* If a branch starts with a bracket or a positive lookahead assertion,
+ recurse to set bits from within them. That's all for this branch. */
+
+ if ((int)*tcode >= OP_BRA || *tcode == OP_ASSERT)
+ {
+ if (!set_start_bits(tcode, start_bits, caseless, cd))
+ return FALSE;
+ try_next = FALSE;
+ }
+
+ else switch(*tcode)
+ {
+ default:
+ return FALSE;
+
+ /* Skip over extended extraction bracket number */
+
+ case OP_BRANUMBER:
+ tcode += 3;
+ break;
+
+ /* Skip over lookbehind and negative lookahead assertions */
+
+ case OP_ASSERT_NOT:
+ case OP_ASSERTBACK:
+ case OP_ASSERTBACK_NOT:
+ do tcode += (tcode[1] << 8) + tcode[2]; while (*tcode == OP_ALT);
+ tcode += 3;
+ break;
+
+ /* Skip over an option setting, changing the caseless flag */
+
+ case OP_OPT:
+ caseless = (tcode[1] & PCRE_CASELESS) != 0;
+ tcode += 2;
+ break;
+
+ /* BRAZERO does the bracket, but carries on. */
+
+ case OP_BRAZERO:
+ case OP_BRAMINZERO:
+ if (!set_start_bits(++tcode, start_bits, caseless, cd))
+ return FALSE;
+ dummy = 1;
+ do tcode += (tcode[1] << 8) + tcode[2]; while (*tcode == OP_ALT);
+ tcode += 3;
+ break;
+
+ /* Single-char * or ? sets the bit and tries the next item */
+
+ case OP_STAR:
+ case OP_MINSTAR:
+ case OP_QUERY:
+ case OP_MINQUERY:
+ set_bit(start_bits, tcode[1], caseless, cd);
+ tcode += 2;
+ break;
+
+ /* Single-char upto sets the bit and tries the next */
+
+ case OP_UPTO:
+ case OP_MINUPTO:
+ set_bit(start_bits, tcode[3], caseless, cd);
+ tcode += 4;
+ break;
+
+ /* At least one single char sets the bit and stops */
+
+ case OP_EXACT: /* Fall through */
+ tcode++;
+
+ case OP_CHARS: /* Fall through */
+ tcode++;
+
+ case OP_PLUS:
+ case OP_MINPLUS:
+ set_bit(start_bits, tcode[1], caseless, cd);
+ try_next = FALSE;
+ break;
+
+ /* Single character type sets the bits and stops */
+
+ case OP_NOT_DIGIT:
+ for (c = 0; c < 32; c++)
+ start_bits[c] |= ~cd->cbits[c+cbit_digit];
+ try_next = FALSE;
+ break;
+
+ case OP_DIGIT:
+ for (c = 0; c < 32; c++)
+ start_bits[c] |= cd->cbits[c+cbit_digit];
+ try_next = FALSE;
+ break;
+
+ case OP_NOT_WHITESPACE:
+ for (c = 0; c < 32; c++)
+ start_bits[c] |= ~cd->cbits[c+cbit_space];
+ try_next = FALSE;
+ break;
+
+ case OP_WHITESPACE:
+ for (c = 0; c < 32; c++)
+ start_bits[c] |= cd->cbits[c+cbit_space];
+ try_next = FALSE;
+ break;
+
+ case OP_NOT_WORDCHAR:
+ for (c = 0; c < 32; c++)
+ start_bits[c] |= ~cd->cbits[c+cbit_word];
+ try_next = FALSE;
+ break;
+
+ case OP_WORDCHAR:
+ for (c = 0; c < 32; c++)
+ start_bits[c] |= cd->cbits[c+cbit_word];
+ try_next = FALSE;
+ break;
+
+ /* One or more character type fudges the pointer and restarts, knowing
+ it will hit a single character type and stop there. */
+
+ case OP_TYPEPLUS:
+ case OP_TYPEMINPLUS:
+ tcode++;
+ break;
+
+ case OP_TYPEEXACT:
+ tcode += 3;
+ break;
+
+ /* Zero or more repeats of character types set the bits and then
+ try again. */
+
+ case OP_TYPEUPTO:
+ case OP_TYPEMINUPTO:
+ tcode += 2; /* Fall through */
+
+ case OP_TYPESTAR:
+ case OP_TYPEMINSTAR:
+ case OP_TYPEQUERY:
+ case OP_TYPEMINQUERY:
+ switch(tcode[1])
+ {
+ case OP_NOT_DIGIT:
+ for (c = 0; c < 32; c++)
+ start_bits[c] |= ~cd->cbits[c+cbit_digit];
+ break;
+
+ case OP_DIGIT:
+ for (c = 0; c < 32; c++)
+ start_bits[c] |= cd->cbits[c+cbit_digit];
+ break;
+
+ case OP_NOT_WHITESPACE:
+ for (c = 0; c < 32; c++)
+ start_bits[c] |= ~cd->cbits[c+cbit_space];
+ break;
+
+ case OP_WHITESPACE:
+ for (c = 0; c < 32; c++)
+ start_bits[c] |= cd->cbits[c+cbit_space];
+ break;
+
+ case OP_NOT_WORDCHAR:
+ for (c = 0; c < 32; c++)
+ start_bits[c] |= ~cd->cbits[c+cbit_word];
+ break;
+
+ case OP_WORDCHAR:
+ for (c = 0; c < 32; c++)
+ start_bits[c] |= cd->cbits[c+cbit_word];
+ break;
+ }
+
+ tcode += 2;
+ break;
+
+ /* Character class: set the bits and either carry on or not,
+ according to the repeat count. */
+
+ case OP_CLASS:
+ {
+ tcode++;
+ for (c = 0; c < 32; c++) start_bits[c] |= tcode[c];
+ tcode += 32;
+ switch (*tcode)
+ {
+ case OP_CRSTAR:
+ case OP_CRMINSTAR:
+ case OP_CRQUERY:
+ case OP_CRMINQUERY:
+ tcode++;
+ break;
+
+ case OP_CRRANGE:
+ case OP_CRMINRANGE:
+ if (((tcode[1] << 8) + tcode[2]) == 0) tcode += 5;
+ else try_next = FALSE;
+ break;
+
+ default:
+ try_next = FALSE;
+ break;
+ }
+ }
+ break; /* End of class handling */
+
+ } /* End of switch */
+ } /* End of try_next loop */
+
+ code += (code[1] << 8) + code[2]; /* Advance to next branch */
+ }
+while (*code == OP_ALT);
+return TRUE;
+}
+
+
+
+/*************************************************
+* Study a compiled expression *
+*************************************************/
+
+/* This function is handed a compiled expression that it must study to produce
+information that will speed up the matching. It returns a pcre_extra block
+which then gets handed back to pcre_exec().
+
+Arguments:
+ re points to the compiled expression
+ options contains option bits
+ errorptr points to where to place error messages;
+ set NULL unless error
+
+Returns: pointer to a pcre_extra block,
+ NULL on error or if no optimization possible
+*/
+
+pcre_extra *
+pcre_study(const pcre *external_re, int options, const char **errorptr)
+{
+uschar start_bits[32];
+real_pcre_extra *extra;
+const real_pcre *re = (const real_pcre *)external_re;
+compile_data compile_block;
+
+*errorptr = NULL;
+
+if (re == NULL || re->magic_number != MAGIC_NUMBER)
+ {
+ *errorptr = "argument is not a compiled regular expression";
+ return NULL;
+ }
+
+if ((options & ~PUBLIC_STUDY_OPTIONS) != 0)
+ {
+ *errorptr = "unknown or incorrect option bit(s) set";
+ return NULL;
+ }
+
+/* For an anchored pattern, or an unchored pattern that has a first char, or a
+multiline pattern that matches only at "line starts", no further processing at
+present. */
+
+if ((re->options & (PCRE_ANCHORED|PCRE_FIRSTSET|PCRE_STARTLINE)) != 0)
+ return NULL;
+
+/* Set the character tables in the block which is passed around */
+
+compile_block.lcc = re->tables + lcc_offset;
+compile_block.fcc = re->tables + fcc_offset;
+compile_block.cbits = re->tables + cbits_offset;
+compile_block.ctypes = re->tables + ctypes_offset;
+
+/* See if we can find a fixed set of initial characters for the pattern. */
+
+memset(start_bits, 0, 32 * sizeof(uschar));
+if (!set_start_bits(re->code, start_bits, (re->options & PCRE_CASELESS) != 0,
+ &compile_block)) return NULL;
+
+/* Get an "extra" block and put the information therein. */
+
+extra = (real_pcre_extra *)(pcre_malloc)(sizeof(real_pcre_extra));
+
+if (extra == NULL)
+ {
+ *errorptr = "failed to get memory";
+ return NULL;
+ }
+
+extra->options = PCRE_STUDY_MAPPED;
+memcpy(extra->start_bits, start_bits, sizeof(start_bits));
+
+return (pcre_extra *)extra;
+}
+
+/* End of study.c */