X-Git-Url: http://git.jankratochvil.net/?a=blobdiff_plain;ds=sidebyside;f=tac_regexp.c;h=390c5f8e014b10401b1a92d784d44a1da8d02a78;hb=413c510553a773cd16e2b538e4a208b4c4d9f775;hp=9c0357ad56f2fdd84a269a9ff97be4dead3637ea;hpb=a296ccf128acec69a7db2312ebcc231cd18e5944;p=tac_plus.git diff --git a/tac_regexp.c b/tac_regexp.c index 9c0357a..390c5f8 100644 --- a/tac_regexp.c +++ b/tac_regexp.c @@ -1,4 +1,4 @@ -/* +/* Copyright (c) 1995-1998 by Cisco systems, Inc. Permission to use, copy, modify, and distribute this software for @@ -42,9 +42,23 @@ * precedence is structured in regular expressions. Serious changes in * regular-expression syntax might require a total rethink. */ + + +#include "tac_plus.h" + +#ifdef WITH_INCLUDED_REGEX + #include -#include "regexp.h" -#include "regmagic.h" +#include +#include /* malloc() can be found in OR */ +#ifdef HAVE_MALLOC_H +#include +#endif + +#include "tac_regexp.h" +#include "tac_regmagic.h" +#include "report.h" /* for regerror() */ + /* * The "internal use only" fields in regexp.h are present to pass info from @@ -149,7 +163,7 @@ #define UCHARAT(p) ((int)*(p)&CHARBITS) #endif -#define FAIL(m) { regerror(m); return(NULL); } +#define FAIL(m) { tac_regerror(m); return(NULL); } #define ISMULT(c) ((c) == '*' || (c) == '+' || (c) == '?') #define META "^$.[()|?+*\\" @@ -164,7 +178,7 @@ /* * Global work variables for regcomp(). */ -static char *regparse; /* Input-scan pointer. */ +static const char *regparse; /* Input-scan pointer. */ static int regnpar; /* () count. */ static char regdummy; static char *regcode; /* Code-emit pointer; ®dummy = don't. */ @@ -173,23 +187,24 @@ static long regsize; /* Code size. */ /* * Forward declarations for regcomp()'s friends. */ -#ifndef STATIC -#define STATIC static -#endif -STATIC char *reg(); -STATIC char *regbranch(); -STATIC char *regpiece(); -STATIC char *regatom(); -STATIC char *regnode(); -STATIC char *regnext(); -STATIC void regc(); -STATIC void reginsert(); -STATIC void regtail(); -STATIC void regoptail(); -#ifdef STRCSPN -STATIC int strcspn(); +static char *reg TAC_ARGS((int paren, int *flagp)); +static char *regbranch TAC_ARGS((int *flagp)); +static char *regpiece TAC_ARGS((int *flagp)); +static char *regatom TAC_ARGS((int *flagp)); +static char *regnode TAC_ARGS((int op)); +static void regc TAC_ARGS((int b)); +static void reginsert TAC_ARGS((int op, char *opnd)); +static void regtail TAC_ARGS((char *p, char *val)); +static void regoptail TAC_ARGS((char *p, char *val)); +static int regtry TAC_ARGS((tac_regexp *prog, const char *string)); +static int regmatch TAC_ARGS((char *prog)); +static int regrepeat TAC_ARGS((char *p)); +static char *regnext TAC_ARGS((register char *p)); +#ifndef HAVE_STRCSPN +static int strcspn TAC_ARGS((char *s1, char *s2)); #endif + /* - regcomp - compile a regular expression into internal code * @@ -205,16 +220,18 @@ STATIC int strcspn(); * Beware that the optimization-preparation code in here knows about some * of the structure of the compiled regexp. */ -regexp * -regcomp(exp) -char *exp; + +tac_regexp *tac_regcomp TAC_ARGS((const char *exp)); + +tac_regexp * +tac_regcomp(exp) +const char *exp; { - register regexp *r; + register tac_regexp *r; register char *scan; register char *longest; register int len; int flags; - extern char *malloc(); if (exp == NULL) FAIL("NULL argument"); @@ -233,7 +250,7 @@ char *exp; FAIL("regexp too big"); /* Allocate space. */ - r = (regexp *)malloc(sizeof(regexp) + (unsigned)regsize); + r = (tac_regexp *)malloc(sizeof(tac_regexp) + (unsigned)regsize); if (r == NULL) FAIL("out of space"); @@ -272,7 +289,7 @@ char *exp; longest = NULL; len = 0; for (; scan != NULL; scan = regnext(scan)) - if (OP(scan) == EXACTLY && strlen(OPERAND(scan)) >= len) { + if (OP(scan) == EXACTLY && strlen(OPERAND(scan)) >= (unsigned)len) { longest = OPERAND(scan); len = strlen(OPERAND(scan)); } @@ -293,6 +310,9 @@ char *exp; * is a trifle forced, but the need to tie the tails of the branches to what * follows makes it hard to avoid. */ + +static char *reg TAC_ARGS((int paren, int *flagp)); + static char * reg(paren, flagp) int paren; /* Parenthesized? */ @@ -301,7 +321,7 @@ int *flagp; register char *ret; register char *br; register char *ender; - register int parno; + register int parno = 0 /* GCC paranoia */; int flags; *flagp = HASWIDTH; /* Tentatively. */ @@ -339,7 +359,7 @@ int *flagp; } /* Make a closing node, and hook it on the end. */ - ender = regnode((paren) ? CLOSE+parno : END); + ender = regnode((paren) ? CLOSE+parno : END); regtail(ret, ender); /* Hook the tails of the branches to the closing node. */ @@ -365,6 +385,9 @@ int *flagp; * * Implements the concatenation operator. */ + +static char *regbranch TAC_ARGS((int *flagp)); + static char * regbranch(flagp) int *flagp; @@ -404,6 +427,9 @@ int *flagp; * It might seem that this node could be dispensed with entirely, but the * endmarker role is not redundant. */ + +static char *regpiece TAC_ARGS((int *flagp)); + static char * regpiece(flagp) int *flagp; @@ -468,6 +494,9 @@ int *flagp; * faster to run. Backslashed characters are exceptions, each becoming a * separate node; the code is simpler that way and it's not worth fixing. */ + +static char *regatom TAC_ARGS((int *flagp)); + static char * regatom(flagp) int *flagp; @@ -577,9 +606,12 @@ int *flagp; /* - regnode - emit a node */ + +static char *regnode TAC_ARGS((int op)); + static char * /* Location. */ regnode(op) -char op; +int op; /* promoted "char" type */ { register char *ret; register char *ptr; @@ -602,9 +634,12 @@ char op; /* - regc - emit (if appropriate) a byte of code */ + +static void regc TAC_ARGS((int b)); + static void regc(b) -char b; +int b; /* promoted "char" type */ { if (regcode != ®dummy) *regcode++ = b; @@ -617,9 +652,12 @@ char b; * * Means relocating the operand. */ + +static void reginsert TAC_ARGS((int op, char *opnd)); + static void reginsert(op, opnd) -char op; +int op; /* promoted "char" type */ char *opnd; { register char *src; @@ -646,6 +684,9 @@ char *opnd; /* - regtail - set the next-pointer at the end of a node chain */ + +static void regtail TAC_ARGS((char *p, char *val)); + static void regtail(p, val) char *p; @@ -678,6 +719,9 @@ char *val; /* - regoptail - regtail on operand of first argument; nop if operandless */ + +static void regoptail TAC_ARGS((char *p, char *val)); + static void regoptail(p, val) char *p; @@ -696,44 +740,40 @@ char *val; /* * Global work variables for regexec(). */ -static char *reginput; /* String-input pointer. */ -static char *regbol; /* Beginning of input, for ^ check. */ -static char **regstartp; /* Pointer to startp array. */ -static char **regendp; /* Ditto for endp. */ +static const char *reginput; /* String-input pointer. */ +static const char *regbol; /* Beginning of input, for ^ check. */ +static const char **regstartp; /* Pointer to startp array. */ +static const char **regendp; /* Ditto for endp. */ -/* - * Forwards. - */ -STATIC int regtry(); -STATIC int regmatch(); -STATIC int regrepeat(); #ifdef DEBUG int regnarrate = 0; -void regdump(); -STATIC char *regprop(); +static char *regprop TAC_ARGS((char *op)); +static void regdump TAC_ARGS((tac_regexp *r)); #endif /* - regexec - match a regexp against a string */ + +int tac_regexec TAC_ARGS((register tac_regexp *prog, register const char *string)); + int -regexec(prog, string) -register regexp *prog; -register char *string; +tac_regexec(prog, string) +register tac_regexp *prog; +register const char *string; { - register char *s; - extern char *strchr(); + register const char *s; /* Be paranoid... */ if (prog == NULL || string == NULL) { - regerror("NULL parameter"); + tac_regerror("NULL parameter"); return(0); } /* Check validity of program. */ if (UCHARAT(prog->program) != MAGIC) { - regerror("corrupted program"); + tac_regerror("corrupted program"); return(0); } @@ -779,14 +819,17 @@ register char *string; /* - regtry - try match at specific point */ + +static int regtry TAC_ARGS((tac_regexp *prog, const char *string)); + static int /* 0 failure, 1 success */ regtry(prog, string) -regexp *prog; -char *string; +tac_regexp *prog; +const char *string; { register int i; - register char **sp; - register char **ep; + register const char **sp; + register const char **ep; reginput = string; regstartp = prog->startp; @@ -816,13 +859,15 @@ char *string; * need to know whether the rest of the match failed) by a loop instead of * by recursion. */ + +static int regmatch TAC_ARGS((char *prog)); + static int /* 0 failure, 1 success */ regmatch(prog) char *prog; { register char *scan; /* Current node. */ char *next; /* Next node. */ - extern char *strchr(); scan = prog; #ifdef DEBUG @@ -888,7 +933,7 @@ char *prog; case OPEN+8: case OPEN+9: { register int no; - register char *save; + register const char *save; no = OP(scan) - OPEN; save = reginput; @@ -916,7 +961,7 @@ char *prog; case CLOSE+8: case CLOSE+9: { register int no; - register char *save; + register const char *save; no = OP(scan) - CLOSE; save = reginput; @@ -935,7 +980,7 @@ char *prog; } break; case BRANCH: { - register char *save; + register const char *save; if (OP(next) != BRANCH) /* No choice. */ next = OPERAND(scan); /* Avoid recursion. */ @@ -956,7 +1001,7 @@ char *prog; case PLUS: { register char nextch; register int no; - register char *save; + register const char *save; register int min; /* @@ -985,7 +1030,7 @@ char *prog; return(1); /* Success! */ break; default: - regerror("memory corruption"); + tac_regerror("memory corruption"); return(0); break; } @@ -997,21 +1042,23 @@ char *prog; * We get here only if there's trouble -- normally "case END" is * the terminating point. */ - regerror("corrupted pointers"); + tac_regerror("corrupted pointers"); return(0); } /* - regrepeat - repeatedly match something simple, report how many */ + +static int regrepeat TAC_ARGS((char *p)); + static int regrepeat(p) char *p; { register int count = 0; - register char *scan; + register const char *scan; register char *opnd; - extern char *strchr(); scan = reginput; opnd = OPERAND(p); @@ -1039,7 +1086,7 @@ char *p; } break; default: /* Oh dear. Called inappropriately. */ - regerror("internal foulup"); + tac_regerror("internal foulup"); count = 0; /* Best compromise. */ break; } @@ -1051,6 +1098,9 @@ char *p; /* - regnext - dig the "next" pointer out of a node */ + +static char *regnext TAC_ARGS((register char *p)); + static char * regnext(p) register char *p; @@ -1072,19 +1122,19 @@ register char *p; #ifdef DEBUG -STATIC char *regprop(); - /* - regdump - dump a regexp onto stdout in vaguely comprehensible form */ -void + +static void regdump TAC_ARGS((tac_regexp *r)); + +static void regdump(r) -regexp *r; +tac_regexp *r; { register char *s; register char op = EXACTLY; /* Arbitrary non-END op. */ register char *next; - extern char *strchr(); s = r->program + 1; @@ -1094,7 +1144,7 @@ regexp *r; next = regnext(s); if (next == NULL) /* Next ptr. */ printf("(0)"); - else + else printf("(%d)", (s-r->program)+(next-s)); s += 3; if (op == ANYOF || op == ANYBUT || op == EXACTLY) { @@ -1121,6 +1171,9 @@ regexp *r; /* - regprop - printable representation of opcode */ + +static char *regprop TAC_ARGS((char *op)); + static char * regprop(op) char *op; @@ -1192,7 +1245,7 @@ char *op; p = "PLUS"; break; default: - regerror("corrupted opcode"); + tac_regerror("corrupted opcode"); break; } if (p != NULL) @@ -1207,12 +1260,14 @@ char *op; * about it; at least one public-domain implementation of those (highly * useful) string routines has been published on Usenet. */ -#ifdef STRCSPN +#ifndef HAVE_STRCSPN /* * strcspn - find length of initial segment of s1 consisting entirely * of characters not from s2 */ +static int strcspn TAC_ARGS((char *s1, char *s2)); + static int strcspn(s1, s2) char *s1; @@ -1231,4 +1286,10 @@ char *s2; } return(count); } -#endif +#endif /* HAVE_STRCSPN */ + +#else /* WITH_INCLUDED_REGEX */ + +TAC_SOURCEFILE_EMPTY + +#endif /* WITH_INCLUDED_REGEX */