1 /****************************************************************
2 Copyright (C) Lucent Technologies 1997
5 Permission to use, copy, modify, and distribute this software and
6 its documentation for any purpose and without fee is hereby
7 granted, provided that the above copyright notice appear in all
8 copies and that both that the copyright notice and this
9 permission notice and warranty disclaimer appear in supporting
10 documentation, and that the name Lucent Technologies or any of
11 its entities not be used in advertising or publicity pertaining
12 to distribution of the software without specific, written prior
15 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
16 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
17 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
18 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
20 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
21 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
23 ****************************************************************/
36 /* This file provides the interface between the main body of
37 * awk and the pattern matching package. It preprocesses
38 * patterns prior to compilation to provide awk-like semantics
39 * to character sequences not supported by the pattern package.
40 * The following conversions are performed:
47 * "\xdddd" -> "\z" where 'z' is the UTF sequence
49 * "\ddd" -> "\o" where 'o' is a char octal value
50 * "\b" -> "\B" where 'B' is backspace
51 * "\t" -> "\T" where 'T' is tab
52 * "\f" -> "\F" where 'F' is form feed
53 * "\n" -> "\N" where 'N' is newline
54 * "\r" -> "\r" where 'C' is cr
59 static char re[MAXRE]; /* copy buffer */
62 int patlen; /* number of chars in pattern */
64 #define NPATS 20 /* number of slots in pattern cache */
66 static struct pat_list /* dynamic pattern cache */
73 static int npats; /* cache fill level */
75 /* Compile a pattern */
83 if (!compile_time) { /* search cache for dynamic pattern */
84 for (i = 0; i < npats; i++)
85 if (!strcmp(pat, pattern[i].re)) {
87 return((void *) pattern[i].program);
90 /* Preprocess Pattern for compilation */
96 quoted(&s, &p, re+MAXRE);
99 else if (!inclass && c == '(' && *s == ')') {
100 if (p < re+MAXRE-2) { /* '()' -> '[]*' */
108 else if (c == '['){ /* '[-' -> '[\-' */
111 if (p < re+MAXRE-2) {
117 } /* '[^-' -> '[^\-'*/
118 else if (*s == '^' && s[1] == '-'){
119 if (p < re+MAXRE-3) {
127 else if (*s == '['){ /* skip '[[' */
133 else if (*s == '^' && s[1] == '[') { /* skip '[^['*/
134 if (p < re+MAXRE-2) {
141 else if (*s == ']') { /* '[]' -> '[]*' */
142 if (p < re+MAXRE-2) {
151 else if (c == '-' && *s == ']') { /* '-]' -> '\-]' */
163 program = regcomp(re); /* compile pattern */
165 if (npats < NPATS) /* Room in cache */
167 else { /* Throw out least used */
168 int use = pattern[0].use;
170 for (j = 1; j < NPATS; j++) {
171 if (pattern[j].use < use) {
172 use = pattern[j].use;
176 xfree(pattern[i].program);
177 xfree(pattern[i].re);
179 pattern[i].re = tostring(pat);
180 pattern[i].program = program;
183 return((void *) program);
186 /* T/F match indication - matched string not exported */
188 match(void *p, char *s, char *start)
190 return regexec((Reprog *) p, (char *) s, 0, 0);
193 /* match and delimit the matched string */
195 pmatch(void *p, char *s, char *start)
201 if (regexec((Reprog *) p, (char *) s, &m, 1)) {
203 patlen = m.e.ep-m.s.sp;
211 /* perform a non-empty match */
213 nematch(void *p, char *s, char *start)
215 if (pmatch(p, s, start) == 1 && patlen > 0)
221 /* in the parsing of regular expressions, metacharacters like . have */
222 /* to be seen literally; \056 is not a metacharacter. */
225 hexstr(char **pp) /* find and eval hex string at pp, return new p */
231 for (i = 0, c = (*pp)[i]; i < 4 && isxdigit(c); i++, c = (*pp)[i]) {
233 n = 16 * n + c - '0';
234 else if ('a' <= c && c <= 'f')
235 n = 16 * n + c - 'a' + 10;
236 else if ('A' <= c && c <= 'F')
237 n = 16 * n + c - 'A' + 10;
243 /* look for awk-specific escape sequences */
245 #define isoctdigit(c) ((c) >= '0' && (c) <= '7') /* multiple use of arg */
248 quoted(char **s, char **to, char *end) /* handle escaped sequence */
271 if (t < end-1) /* all else must be escaped */
273 if (c == 'x') { /* hexadecimal goo follows */
275 if (t < end-MB_CUR_MAX)
281 } else if (isoctdigit(c)) { /* \d \dd \ddd */
283 if (isoctdigit(*p)) {
284 c = 8 * c + *p++ - '0';
286 c = 8 * c + *p++ - '0';
296 /* count rune positions */
298 countposn(char *s, int n)
303 for (i = 0, end = s+n; *s && s < end; i++){
312 /* pattern package error handler */
323 FATAL("%s", "regular expression too big");