diff options
Diffstat (limited to 'rubbos/app/httpd-2.0.64/modules/mappers/mod_speling.c')
-rw-r--r-- | rubbos/app/httpd-2.0.64/modules/mappers/mod_speling.c | 532 |
1 files changed, 0 insertions, 532 deletions
diff --git a/rubbos/app/httpd-2.0.64/modules/mappers/mod_speling.c b/rubbos/app/httpd-2.0.64/modules/mappers/mod_speling.c deleted file mode 100644 index 9520cbee..00000000 --- a/rubbos/app/httpd-2.0.64/modules/mappers/mod_speling.c +++ /dev/null @@ -1,532 +0,0 @@ -/* Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "apr.h" -#include "apr_file_io.h" -#include "apr_strings.h" -#include "apr_lib.h" - -#define APR_WANT_STRFUNC -#include "apr_want.h" - -#define WANT_BASENAME_MATCH - -#include "httpd.h" -#include "http_core.h" -#include "http_config.h" -#include "http_request.h" -#include "http_log.h" - -/* mod_speling.c - by Alexei Kosut <akosut@organic.com> June, 1996 - * - * This module is transparent, and simple. It attempts to correct - * misspellings of URLs that users might have entered, namely by checking - * capitalizations. If it finds a match, it sends a redirect. - * - * 08-Aug-1997 <Martin.Kraemer@Mch.SNI.De> - * o Upgraded module interface to apache_1.3a2-dev API (more NULL's in - * speling_module). - * o Integrated tcsh's "spelling correction" routine which allows one - * misspelling (character insertion/omission/typo/transposition). - * Rewrote it to ignore case as well. This ought to catch the majority - * of misspelled requests. - * o Commented out the second pass where files' suffixes are stripped. - * Given the better hit rate of the first pass, this rather ugly - * (request index.html, receive index.db ?!?!) solution can be - * omitted. - * o wrote a "kind of" html page for mod_speling - * - * Activate it with "CheckSpelling On" - */ - -module AP_MODULE_DECLARE_DATA speling_module; - -typedef struct { - int enabled; -} spconfig; - -/* - * Create a configuration specific to this module for a server or directory - * location, and fill it with the default settings. - * - * The API says that in the absence of a merge function, the record for the - * closest ancestor is used exclusively. That's what we want, so we don't - * bother to have such a function. - */ - -static void *mkconfig(apr_pool_t *p) -{ - spconfig *cfg = apr_pcalloc(p, sizeof(spconfig)); - - cfg->enabled = 0; - return cfg; -} - -/* - * Respond to a callback to create configuration record for a server or - * vhost environment. - */ -static void *create_mconfig_for_server(apr_pool_t *p, server_rec *s) -{ - return mkconfig(p); -} - -/* - * Respond to a callback to create a config record for a specific directory. - */ -static void *create_mconfig_for_directory(apr_pool_t *p, char *dir) -{ - return mkconfig(p); -} - -/* - * Handler for the CheckSpelling directive, which is FLAG. - */ -static const char *set_speling(cmd_parms *cmd, void *mconfig, int arg) -{ - spconfig *cfg = (spconfig *) mconfig; - - cfg->enabled = arg; - return NULL; -} - -/* - * Define the directives specific to this module. This structure is referenced - * later by the 'module' structure. - */ -static const command_rec speling_cmds[] = -{ - AP_INIT_FLAG("CheckSpelling", set_speling, NULL, OR_OPTIONS, - "whether or not to fix miscapitalized/misspelled requests"), - { NULL } -}; - -typedef enum { - SP_IDENTICAL = 0, - SP_MISCAPITALIZED = 1, - SP_TRANSPOSITION = 2, - SP_MISSINGCHAR = 3, - SP_EXTRACHAR = 4, - SP_SIMPLETYPO = 5, - SP_VERYDIFFERENT = 6 -} sp_reason; - -static const char *sp_reason_str[] = -{ - "identical", - "miscapitalized", - "transposed characters", - "character missing", - "extra character", - "mistyped character", - "common basename", -}; - -typedef struct { - const char *name; - sp_reason quality; -} misspelled_file; - -/* - * spdist() is taken from Kernighan & Pike, - * _The_UNIX_Programming_Environment_ - * and adapted somewhat to correspond better to psychological reality. - * (Note the changes to the return values) - * - * According to Pollock and Zamora, CACM April 1984 (V. 27, No. 4), - * page 363, the correct order for this is: - * OMISSION = TRANSPOSITION > INSERTION > SUBSTITUTION - * thus, it was exactly backwards in the old version. -- PWP - * - * This routine was taken out of tcsh's spelling correction code - * (tcsh-6.07.04) and re-converted to apache data types ("char" type - * instead of tcsh's NLS'ed "Char"). Plus it now ignores the case - * during comparisons, so is a "approximate strcasecmp()". - * NOTE that is still allows only _one_ real "typo", - * it does NOT try to correct multiple errors. - */ - -static sp_reason spdist(const char *s, const char *t) -{ - for (; apr_tolower(*s) == apr_tolower(*t); t++, s++) { - if (*t == '\0') { - return SP_MISCAPITALIZED; /* exact match (sans case) */ - } - } - if (*s) { - if (*t) { - if (s[1] && t[1] && apr_tolower(*s) == apr_tolower(t[1]) - && apr_tolower(*t) == apr_tolower(s[1]) - && strcasecmp(s + 2, t + 2) == 0) { - return SP_TRANSPOSITION; /* transposition */ - } - if (strcasecmp(s + 1, t + 1) == 0) { - return SP_SIMPLETYPO; /* 1 char mismatch */ - } - } - if (strcasecmp(s + 1, t) == 0) { - return SP_EXTRACHAR; /* extra character */ - } - } - if (*t && strcasecmp(s, t + 1) == 0) { - return SP_MISSINGCHAR; /* missing character */ - } - return SP_VERYDIFFERENT; /* distance too large to fix. */ -} - -static int sort_by_quality(const void *left, const void *rite) -{ - return (int) (((misspelled_file *) left)->quality) - - (int) (((misspelled_file *) rite)->quality); -} - -static int check_speling(request_rec *r) -{ - spconfig *cfg; - char *good, *bad, *postgood, *url; - apr_finfo_t dirent; - int filoc, dotloc, urlen, pglen; - apr_array_header_t *candidates = NULL; - apr_dir_t *dir; - - cfg = ap_get_module_config(r->per_dir_config, &speling_module); - if (!cfg->enabled) { - return DECLINED; - } - - /* We only want to worry about GETs */ - if (r->method_number != M_GET) { - return DECLINED; - } - - /* We've already got a file of some kind or another */ - if (r->finfo.filetype != 0) { - return DECLINED; - } - - /* Not a file request */ - if (r->proxyreq || !r->filename) { - return DECLINED; - } - - /* This is a sub request - don't mess with it */ - if (r->main) { - return DECLINED; - } - - /* - * The request should end up looking like this: - * r->uri: /correct-url/mispelling/more - * r->filename: /correct-file/mispelling r->path_info: /more - * - * So we do this in steps. First break r->filename into two pieces - */ - - filoc = ap_rind(r->filename, '/'); - /* - * Don't do anything if the request doesn't contain a slash, or - * requests "/" - */ - if (filoc == -1 || strcmp(r->uri, "/") == 0) { - return DECLINED; - } - - /* good = /correct-file */ - good = apr_pstrndup(r->pool, r->filename, filoc); - /* bad = mispelling */ - bad = apr_pstrdup(r->pool, r->filename + filoc + 1); - /* postgood = mispelling/more */ - postgood = apr_pstrcat(r->pool, bad, r->path_info, NULL); - - urlen = strlen(r->uri); - pglen = strlen(postgood); - - /* Check to see if the URL pieces add up */ - if (strcmp(postgood, r->uri + (urlen - pglen))) { - return DECLINED; - } - - /* url = /correct-url */ - url = apr_pstrndup(r->pool, r->uri, (urlen - pglen)); - - /* Now open the directory and do ourselves a check... */ - if (apr_dir_open(&dir, good, r->pool) != APR_SUCCESS) { - /* Oops, not a directory... */ - return DECLINED; - } - - candidates = apr_array_make(r->pool, 2, sizeof(misspelled_file)); - - dotloc = ap_ind(bad, '.'); - if (dotloc == -1) { - dotloc = strlen(bad); - } - - while (apr_dir_read(&dirent, APR_FINFO_DIRENT, dir) == APR_SUCCESS) { - sp_reason q; - - /* - * If we end up with a "fixed" URL which is identical to the - * requested one, we must have found a broken symlink or some such. - * Do _not_ try to redirect this, it causes a loop! - */ - if (strcmp(bad, dirent.name) == 0) { - apr_dir_close(dir); - return OK; - } - - /* - * miscapitalization errors are checked first (like, e.g., lower case - * file, upper case request) - */ - else if (strcasecmp(bad, dirent.name) == 0) { - misspelled_file *sp_new; - - sp_new = (misspelled_file *) apr_array_push(candidates); - sp_new->name = apr_pstrdup(r->pool, dirent.name); - sp_new->quality = SP_MISCAPITALIZED; - } - - /* - * simple typing errors are checked next (like, e.g., - * missing/extra/transposed char) - */ - else if ((q = spdist(bad, dirent.name)) != SP_VERYDIFFERENT) { - misspelled_file *sp_new; - - sp_new = (misspelled_file *) apr_array_push(candidates); - sp_new->name = apr_pstrdup(r->pool, dirent.name); - sp_new->quality = q; - } - - /* - * The spdist() should have found the majority of the misspelled - * requests. It is of questionable use to continue looking for - * files with the same base name, but potentially of totally wrong - * type (index.html <-> index.db). - * I would propose to not set the WANT_BASENAME_MATCH define. - * 08-Aug-1997 <Martin.Kraemer@Mch.SNI.De> - * - * However, Alexei replied giving some reasons to add it anyway: - * > Oh, by the way, I remembered why having the - * > extension-stripping-and-matching stuff is a good idea: - * > - * > If you're using MultiViews, and have a file named foobar.html, - * > which you refer to as "foobar", and someone tried to access - * > "Foobar", mod_speling won't find it, because it won't find - * > anything matching that spelling. With the extension-munging, - * > it would locate "foobar.html". Not perfect, but I ran into - * > that problem when I first wrote the module. - */ - else { -#ifdef WANT_BASENAME_MATCH - /* - * Okay... we didn't find anything. Now we take out the hard-core - * power tools. There are several cases here. Someone might have - * entered a wrong extension (.htm instead of .html or vice - * versa) or the document could be negotiated. At any rate, now - * we just compare stuff before the first dot. If it matches, we - * figure we got us a match. This can result in wrong things if - * there are files of different content types but the same prefix - * (e.g. foo.gif and foo.html) This code will pick the first one - * it finds. Better than a Not Found, though. - */ - int entloc = ap_ind(dirent.name, '.'); - if (entloc == -1) { - entloc = strlen(dirent.name); - } - - if ((dotloc == entloc) - && !strncasecmp(bad, dirent.name, dotloc)) { - misspelled_file *sp_new; - - sp_new = (misspelled_file *) apr_array_push(candidates); - sp_new->name = apr_pstrdup(r->pool, dirent.name); - sp_new->quality = SP_VERYDIFFERENT; - } -#endif - } - } - apr_dir_close(dir); - - if (candidates->nelts != 0) { - /* Wow... we found us a mispelling. Construct a fixed url */ - char *nuri; - const char *ref; - misspelled_file *variant = (misspelled_file *) candidates->elts; - int i; - - ref = apr_table_get(r->headers_in, "Referer"); - - qsort((void *) candidates->elts, candidates->nelts, - sizeof(misspelled_file), sort_by_quality); - - /* - * Conditions for immediate redirection: - * a) the first candidate was not found by stripping the suffix - * AND b) there exists only one candidate OR the best match is not - * ambiguous - * then return a redirection right away. - */ - if (variant[0].quality != SP_VERYDIFFERENT - && (candidates->nelts == 1 - || variant[0].quality != variant[1].quality)) { - - nuri = ap_escape_uri(r->pool, apr_pstrcat(r->pool, url, - variant[0].name, - r->path_info, NULL)); - if (r->parsed_uri.query) - nuri = apr_pstrcat(r->pool, nuri, "?", r->parsed_uri.query, NULL); - - apr_table_setn(r->headers_out, "Location", - ap_construct_url(r->pool, nuri, r)); - - ap_log_rerror(APLOG_MARK, APLOG_INFO, APR_SUCCESS, - r, - ref ? "Fixed spelling: %s to %s from %s" - : "Fixed spelling: %s to %s", - r->uri, nuri, ref); - - return HTTP_MOVED_PERMANENTLY; - } - /* - * Otherwise, a "[300] Multiple Choices" list with the variants is - * returned. - */ - else { - apr_pool_t *p; - apr_table_t *notes; - apr_pool_t *sub_pool; - apr_array_header_t *t; - apr_array_header_t *v; - - - if (r->main == NULL) { - p = r->pool; - notes = r->notes; - } - else { - p = r->main->pool; - notes = r->main->notes; - } - - if (apr_pool_create(&sub_pool, p) != APR_SUCCESS) - return DECLINED; - - t = apr_array_make(sub_pool, candidates->nelts * 8 + 8, - sizeof(char *)); - v = apr_array_make(sub_pool, candidates->nelts * 5, - sizeof(char *)); - - /* Generate the response text. */ - - *(const char **)apr_array_push(t) = - "The document name you requested (<code>"; - *(const char **)apr_array_push(t) = ap_escape_html(sub_pool, r->uri); - *(const char **)apr_array_push(t) = - "</code>) could not be found on this server.\n" - "However, we found documents with names similar " - "to the one you requested.<p>" - "Available documents:\n<ul>\n"; - - for (i = 0; i < candidates->nelts; ++i) { - char *vuri; - const char *reason; - - reason = sp_reason_str[(int) (variant[i].quality)]; - /* The format isn't very neat... */ - vuri = apr_pstrcat(sub_pool, url, variant[i].name, r->path_info, - (r->parsed_uri.query != NULL) ? "?" : "", - (r->parsed_uri.query != NULL) - ? r->parsed_uri.query : "", - NULL); - *(const char **)apr_array_push(v) = "\""; - *(const char **)apr_array_push(v) = ap_escape_uri(sub_pool, vuri); - *(const char **)apr_array_push(v) = "\";\""; - *(const char **)apr_array_push(v) = reason; - *(const char **)apr_array_push(v) = "\""; - - *(const char **)apr_array_push(t) = "<li><a href=\""; - *(const char **)apr_array_push(t) = ap_escape_uri(sub_pool, vuri); - *(const char **)apr_array_push(t) = "\">"; - *(const char **)apr_array_push(t) = ap_escape_html(sub_pool, vuri); - *(const char **)apr_array_push(t) = "</a> ("; - *(const char **)apr_array_push(t) = reason; - *(const char **)apr_array_push(t) = ")\n"; - - /* - * when we have printed the "close matches" and there are - * more "distant matches" (matched by stripping the suffix), - * then we insert an additional separator text to suggest - * that the user LOOK CLOSELY whether these are really the - * files she wanted. - */ - if (i > 0 && i < candidates->nelts - 1 - && variant[i].quality != SP_VERYDIFFERENT - && variant[i + 1].quality == SP_VERYDIFFERENT) { - *(const char **)apr_array_push(t) = - "</ul>\nFurthermore, the following related " - "documents were found:\n<ul>\n"; - } - } - *(const char **)apr_array_push(t) = "</ul>\n"; - - /* If we know there was a referring page, add a note: */ - if (ref != NULL) { - *(const char **)apr_array_push(t) = - "Please consider informing the owner of the " - "<a href=\""; - *(const char **)apr_array_push(t) = ap_escape_uri(sub_pool, ref); - *(const char **)apr_array_push(t) = "\">referring page</a> " - "about the broken link.\n"; - } - - - /* Pass our apr_table_t to http_protocol.c (see mod_negotiation): */ - apr_table_setn(notes, "variant-list", apr_array_pstrcat(p, t, 0)); - - apr_table_mergen(r->subprocess_env, "VARIANTS", - apr_array_pstrcat(p, v, ',')); - - apr_pool_destroy(sub_pool); - - ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, r, - ref ? "Spelling fix: %s: %d candidates from %s" - : "Spelling fix: %s: %d candidates", - r->uri, candidates->nelts, ref); - - return HTTP_MULTIPLE_CHOICES; - } - } - - return OK; -} - -static void register_hooks(apr_pool_t *p) -{ - ap_hook_fixups(check_speling,NULL,NULL,APR_HOOK_LAST); -} - -module AP_MODULE_DECLARE_DATA speling_module = -{ - STANDARD20_MODULE_STUFF, - create_mconfig_for_directory, /* create per-dir config */ - NULL, /* merge per-dir config */ - create_mconfig_for_server, /* server config */ - NULL, /* merge server config */ - speling_cmds, /* command apr_table_t */ - register_hooks /* register hooks */ -}; |