1 /*************************************************
2 * PCRE testing program *
3 *************************************************/
12 /* Use the internal info for displaying the results of pcre_study(). */
16 /* It is possible to compile this test program without including support for
17 testing the POSIX interface, though this is not available via the standard
21 #include "pcreposix.h"
24 #ifndef CLOCKS_PER_SEC
26 #define CLOCKS_PER_SEC CLK_TCK
28 #define CLOCKS_PER_SEC 100
32 #define LOOPREPEAT 20000
36 static int log_store = 0;
37 static size_t gotten_store;
41 static int utf8_table1[] = {
42 0x0000007f, 0x000007ff, 0x0000ffff, 0x001fffff, 0x03ffffff, 0x7fffffff};
44 static int utf8_table2[] = {
45 0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};
47 static int utf8_table3[] = {
48 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
51 /*************************************************
52 * Convert character value to UTF-8 *
53 *************************************************/
55 /* This function takes an integer value in the range 0 - 0x7fffffff
56 and encodes it as a UTF-8 character in 0 to 6 bytes.
59 cvalue the character value
60 buffer pointer to buffer for result - at least 6 bytes long
62 Returns: number of characters placed in the buffer
63 -1 if input character is negative
64 0 if input character is positive but too big (only when
65 int is longer than 32 bits)
69 ord2utf8(int cvalue, unsigned char *buffer)
72 for (i = 0; i < sizeof(utf8_table1)/sizeof(int); i++)
73 if (cvalue <= utf8_table1[i]) break;
74 if (i >= sizeof(utf8_table1)/sizeof(int)) return 0;
75 if (cvalue < 0) return -1;
78 for (j = i; j > 0; j--)
80 *buffer-- = 0x80 | (cvalue & 0x3f);
83 *buffer = utf8_table2[i] | cvalue;
88 /*************************************************
89 * Convert UTF-8 string to value *
90 *************************************************/
92 /* This function takes one or more bytes that represents a UTF-8 character,
93 and returns the value of the character.
96 buffer a pointer to the byte vector
97 vptr a pointer to an int to receive the value
99 Returns: > 0 => the number of bytes consumed
100 -6 to 0 => malformed UTF-8 character at offset = (-return)
104 utf82ord(unsigned char *buffer, int *vptr)
110 for (i = -1; i < 6; i++) /* i is number of additional bytes */
112 if ((d & 0x80) == 0) break;
116 if (i == -1) { *vptr = c; return 1; } /* ascii character */
117 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
119 /* i now has a value in the range 1-5 */
122 d = (c & utf8_table3[i]) << s;
124 for (j = 0; j < i; j++)
127 if ((c & 0xc0) != 0x80) return -(j+1);
129 d |= (c & 0x3f) << s;
132 /* Check that encoding was the correct unique one */
134 for (j = 0; j < sizeof(utf8_table1)/sizeof(int); j++)
135 if (d <= utf8_table1[j]) break;
136 if (j != i) return -(i+1);
149 /* Debugging function to print the internal form of the regex. This is the same
150 code as contained in pcre.c under the DEBUG macro. */
152 static const char *OP_names[] = {
153 "End", "\\A", "\\B", "\\b", "\\D", "\\d",
154 "\\S", "\\s", "\\W", "\\w", "\\Z", "\\z",
155 "Opt", "^", "$", "Any", "chars", "not",
156 "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
157 "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
158 "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
159 "*", "*?", "+", "+?", "?", "??", "{", "{",
160 "class", "Ref", "Recurse",
161 "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not",
162 "AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cref",
163 "Brazero", "Braminzero", "Branumber", "Bra"
167 static void print_internals(pcre *re)
169 unsigned char *code = ((real_pcre *)re)->code;
171 fprintf(outfile, "------------------------------------------------------------------\n");
178 fprintf(outfile, "%3d ", (int)(code - ((real_pcre *)re)->code));
182 if (*code - OP_BRA > EXTRACT_BASIC_MAX)
183 fprintf(outfile, "%3d Bra extra", (code[1] << 8) + code[2]);
185 fprintf(outfile, "%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA);
192 fprintf(outfile, " %s\n", OP_names[*code]);
193 fprintf(outfile, "------------------------------------------------------------------\n");
197 fprintf(outfile, " %.2x %s", code[1], OP_names[*code]);
202 charlength = *(++code);
203 fprintf(outfile, "%3d ", charlength);
204 while (charlength-- > 0)
205 if (isprint(c = *(++code))) fprintf(outfile, "%c", c);
206 else fprintf(outfile, "\\x%02x", c);
216 case OP_ASSERTBACK_NOT:
222 fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);
237 case OP_TYPEMINQUERY:
238 if (*code >= OP_TYPESTAR)
239 fprintf(outfile, " %s", OP_names[code[1]]);
240 else if (isprint(c = code[1])) fprintf(outfile, " %c", c);
241 else fprintf(outfile, " \\x%02x", c);
242 fprintf(outfile, "%s", OP_names[*code++]);
248 if (isprint(c = code[3])) fprintf(outfile, " %c{", c);
249 else fprintf(outfile, " \\x%02x{", c);
250 if (*code != OP_EXACT) fprintf(outfile, ",");
251 fprintf(outfile, "%d}", (code[1] << 8) + code[2]);
252 if (*code == OP_MINUPTO) fprintf(outfile, "?");
259 fprintf(outfile, " %s{", OP_names[code[3]]);
260 if (*code != OP_TYPEEXACT) fprintf(outfile, "0,");
261 fprintf(outfile, "%d}", (code[1] << 8) + code[2]);
262 if (*code == OP_TYPEMINUPTO) fprintf(outfile, "?");
267 if (isprint(c = *(++code))) fprintf(outfile, " [^%c]", c);
268 else fprintf(outfile, " [^\\x%02x]", c);
277 if (isprint(c = code[1])) fprintf(outfile, " [^%c]", c);
278 else fprintf(outfile, " [^\\x%02x]", c);
279 fprintf(outfile, "%s", OP_names[*code++]);
285 if (isprint(c = code[3])) fprintf(outfile, " [^%c]{", c);
286 else fprintf(outfile, " [^\\x%02x]{", c);
287 if (*code != OP_NOTEXACT) fprintf(outfile, ",");
288 fprintf(outfile, "%d}", (code[1] << 8) + code[2]);
289 if (*code == OP_NOTMINUPTO) fprintf(outfile, "?");
294 fprintf(outfile, " \\%d", (code[1] << 8) | code[2]);
296 goto CLASS_REF_REPEAT;
302 fprintf(outfile, " [");
304 for (i = 0; i < 256; i++)
306 if ((code[i/8] & (1 << (i&7))) != 0)
309 for (j = i+1; j < 256; j++)
310 if ((code[j/8] & (1 << (j&7))) == 0) break;
311 if (i == '-' || i == ']') fprintf(outfile, "\\");
312 if (isprint(i)) fprintf(outfile, "%c", i); else fprintf(outfile, "\\x%02x", i);
315 fprintf(outfile, "-");
316 if (j == '-' || j == ']') fprintf(outfile, "\\");
317 if (isprint(j)) fprintf(outfile, "%c", j); else fprintf(outfile, "\\x%02x", j);
322 fprintf(outfile, "]");
335 fprintf(outfile, "%s", OP_names[*code]);
340 min = (code[1] << 8) + code[2];
341 max = (code[3] << 8) + code[4];
342 if (max == 0) fprintf(outfile, "{%d,}", min);
343 else fprintf(outfile, "{%d,%d}", min, max);
344 if (*code == OP_CRMINRANGE) fprintf(outfile, "?");
354 /* Anything else is just a one-node item */
357 fprintf(outfile, " %s", OP_names[*code]);
362 fprintf(outfile, "\n");
368 /* Character string printing function. A "normal" and a UTF-8 version. */
370 static void pchars(unsigned char *p, int length, int utf8)
377 int rc = utf82ord(p, &c);
382 if (c < 256 && isprint(c)) fprintf(outfile, "%c", c);
383 else fprintf(outfile, "\\x{%02x}", c);
388 /* Not UTF-8, or malformed UTF-8 */
390 if (isprint(c = *(p++))) fprintf(outfile, "%c", c);
391 else fprintf(outfile, "\\x%02x", c);
397 /* Alternative malloc function, to test functionality and show the size of the
400 static void *new_malloc(size_t size)
404 fprintf(outfile, "Memory allocation (code space): %d\n",
405 (int)((int)size - offsetof(real_pcre, code[0])));
412 /* Get one piece of information from the pcre_fullinfo() function */
414 static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
417 if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
418 fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
424 /* Read lines from named file or stdin and write to named file or stdout; lines
425 consist of a regular expression, in delimiters and optionally followed by
426 options, followed by a set of test data, terminated by an empty line. */
428 int main(int argc, char **argv)
430 FILE *infile = stdin;
432 int study_options = 0;
437 int size_offsets = 45;
438 int size_offsets_max;
445 unsigned char buffer[30000];
446 unsigned char dbuffer[1024];
448 /* Static so that new_malloc can use it. */
454 while (argc > 1 && argv[op][0] == '-')
458 if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
460 else if (strcmp(argv[op], "-t") == 0) timeit = 1;
461 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
462 else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
463 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
464 ((size_offsets = (int)strtoul(argv[op+1], &endptr, 10)), *endptr == 0))
470 else if (strcmp(argv[op], "-p") == 0) posix = 1;
474 printf("** Unknown or malformed option %s\n", argv[op]);
475 printf("Usage: pcretest [-d] [-i] [-o <n>] [-p] [-s] [-t] [<input> [<output>]]\n");
476 printf(" -d debug: show compiled code; implies -i\n"
477 " -i show information about compiled pattern\n"
478 " -o <n> set size of offsets vector to <n>\n");
480 printf(" -p use POSIX interface\n");
482 printf(" -s output store information\n"
483 " -t time compilation and execution\n");
490 /* Get the store for the offsets vector, and remember what it was */
492 size_offsets_max = size_offsets;
493 offsets = malloc(size_offsets_max * sizeof(int));
496 printf("** Failed to get %d bytes of memory for offsets vector\n",
497 size_offsets_max * sizeof(int));
501 /* Sort out the input and output files */
505 infile = fopen(argv[op], "r");
508 printf("** Failed to open %s\n", argv[op]);
515 outfile = fopen(argv[op+1], "w");
518 printf("** Failed to open %s\n", argv[op+1]);
523 /* Set alternative malloc function */
525 pcre_malloc = new_malloc;
527 /* Heading line, then prompt for first regex if stdin */
529 fprintf(outfile, "PCRE version %s\n\n", pcre_version());
536 pcre_extra *extra = NULL;
538 #if !defined NOPOSIX /* There are still compilers that require no indent */
544 unsigned char *p, *pp, *ppp;
545 const unsigned char *tables = NULL;
547 int do_debug = debug;
550 int do_showinfo = showinfo;
553 int erroroffset, len, delimiter;
555 if (infile == stdin) printf(" re> ");
556 if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) break;
557 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
560 while (isspace(*p)) p++;
561 if (*p == 0) continue;
563 /* Get the delimiter and seek the end of the pattern; if is isn't
564 complete, read more. */
568 if (isalnum(delimiter) || delimiter == '\\')
570 fprintf(outfile, "** Delimiter must not be alphameric or \\\n");
580 if (*pp == '\\' && pp[1] != 0) pp++;
581 else if (*pp == delimiter) break;
586 len = sizeof(buffer) - (pp - buffer);
589 fprintf(outfile, "** Expression too long - missing delimiter?\n");
593 if (infile == stdin) printf(" > ");
594 if (fgets((char *)pp, len, infile) == NULL)
596 fprintf(outfile, "** Unexpected EOF\n");
600 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
603 /* If the first character after the delimiter is backslash, make
604 the pattern end with backslash. This is purely to provide a way
605 of testing for the error message when a pattern ends with backslash. */
607 if (pp[1] == '\\') *pp++ = '\\';
609 /* Terminate the pattern at the delimiter */
613 /* Look for options after final delimiter */
617 log_store = showstore; /* default from command line */
623 case 'g': do_g = 1; break;
624 case 'i': options |= PCRE_CASELESS; break;
625 case 'm': options |= PCRE_MULTILINE; break;
626 case 's': options |= PCRE_DOTALL; break;
627 case 'x': options |= PCRE_EXTENDED; break;
629 case '+': do_showrest = 1; break;
630 case 'A': options |= PCRE_ANCHORED; break;
631 case 'D': do_debug = do_showinfo = 1; break;
632 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
633 case 'G': do_G = 1; break;
634 case 'I': do_showinfo = 1; break;
635 case 'M': log_store = 1; break;
638 case 'P': do_posix = 1; break;
641 case 'S': do_study = 1; break;
642 case 'U': options |= PCRE_UNGREEDY; break;
643 case 'X': options |= PCRE_EXTRA; break;
644 case '8': options |= PCRE_UTF8; utf8 = 1; break;
648 while (*ppp != '\n' && *ppp != ' ') ppp++;
650 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
652 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
655 tables = pcre_maketables();
659 case '\n': case ' ': break;
661 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
666 /* Handle compiling via the POSIX interface, which doesn't support the
667 timing, showing, or debugging options, nor the ability to pass over
668 local character tables. */
671 if (posix || do_posix)
675 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
676 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
677 rc = regcomp(&preg, (char *)p, cflags);
679 /* Compilation failed; go back for another re, skipping to blank line
680 if non-interactive. */
684 (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));
685 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
690 /* Handle compiling via the native interface */
693 #endif /* !defined NOPOSIX */
700 clock_t start_time = clock();
701 for (i = 0; i < LOOPREPEAT; i++)
703 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
704 if (re != NULL) free(re);
706 time_taken = clock() - start_time;
707 fprintf(outfile, "Compile time %.3f milliseconds\n",
708 ((double)time_taken * 1000.0) /
709 ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));
712 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
714 /* Compilation failed; go back for another re, skipping to blank line
715 if non-interactive. */
719 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
725 if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)
730 len = (int)strlen((char *)buffer);
731 while (len > 0 && isspace(buffer[len-1])) len--;
734 fprintf(outfile, "\n");
739 /* Compilation succeeded; print data if required. There are now two
740 info-returning functions. The old one has a limited interface and
741 returns only limited data. Check that it agrees with the newer one. */
745 unsigned long int get_options;
746 int old_first_char, old_options, old_count;
747 int count, backrefmax, first_char, need_char;
750 if (do_debug) print_internals(re);
752 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
753 new_info(re, NULL, PCRE_INFO_SIZE, &size);
754 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
755 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
756 new_info(re, NULL, PCRE_INFO_FIRSTCHAR, &first_char);
757 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
759 old_count = pcre_info(re, &old_options, &old_first_char);
760 if (count < 0) fprintf(outfile,
761 "Error %d from pcre_info()\n", count);
764 if (old_count != count) fprintf(outfile,
765 "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
768 if (old_first_char != first_char) fprintf(outfile,
769 "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
770 first_char, old_first_char);
772 if (old_options != (int)get_options) fprintf(outfile,
773 "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
774 get_options, old_options);
777 if (size != gotten_store) fprintf(outfile,
778 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
781 fprintf(outfile, "Capturing subpattern count = %d\n", count);
783 fprintf(outfile, "Max back reference = %d\n", backrefmax);
784 if (get_options == 0) fprintf(outfile, "No options\n");
785 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s\n",
786 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
787 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
788 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
789 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
790 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
791 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
792 ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
793 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
794 ((get_options & PCRE_UTF8) != 0)? " utf8" : "");
796 if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)
797 fprintf(outfile, "Case state changes\n");
799 if (first_char == -1)
801 fprintf(outfile, "First char at start or follows \\n\n");
803 else if (first_char < 0)
805 fprintf(outfile, "No first char\n");
809 if (isprint(first_char))
810 fprintf(outfile, "First char = \'%c\'\n", first_char);
812 fprintf(outfile, "First char = %d\n", first_char);
817 fprintf(outfile, "No need char\n");
821 if (isprint(need_char))
822 fprintf(outfile, "Need char = \'%c\'\n", need_char);
824 fprintf(outfile, "Need char = %d\n", need_char);
828 /* If /S was present, study the regexp to generate additional info to
829 help with the matching. */
837 clock_t start_time = clock();
838 for (i = 0; i < LOOPREPEAT; i++)
839 extra = pcre_study(re, study_options, &error);
840 time_taken = clock() - start_time;
841 if (extra != NULL) free(extra);
842 fprintf(outfile, " Study time %.3f milliseconds\n",
843 ((double)time_taken * 1000.0)/
844 ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));
847 extra = pcre_study(re, study_options, &error);
849 fprintf(outfile, "Failed to study: %s\n", error);
850 else if (extra == NULL)
851 fprintf(outfile, "Study returned NULL\n");
853 else if (do_showinfo)
855 uschar *start_bits = NULL;
856 new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
857 if (start_bits == NULL)
858 fprintf(outfile, "No starting character set\n");
863 fprintf(outfile, "Starting character set: ");
864 for (i = 0; i < 256; i++)
866 if ((start_bits[i/8] & (1<<(i%8))) != 0)
870 fprintf(outfile, "\n ");
873 if (isprint(i) && i != ' ')
875 fprintf(outfile, "%c ", i);
880 fprintf(outfile, "\\x%02x ", i);
885 fprintf(outfile, "\n");
891 /* Read data lines and test them */
896 unsigned char *bptr = dbuffer;
897 int *use_offsets = offsets;
898 int use_size_offsets = size_offsets;
904 int start_offset = 0;
909 if (infile == stdin) printf("data> ");
910 if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)
915 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
917 len = (int)strlen((char *)buffer);
918 while (len > 0 && isspace(buffer[len-1])) len--;
923 while (isspace(*p)) p++;
926 while ((c = *p++) != 0)
930 if (c == '\\') switch ((c = *p++))
932 case 'a': c = 7; break;
933 case 'b': c = '\b'; break;
934 case 'e': c = 27; break;
935 case 'f': c = '\f'; break;
936 case 'n': c = '\n'; break;
937 case 'r': c = '\r'; break;
938 case 't': c = '\t'; break;
939 case 'v': c = '\v'; break;
941 case '0': case '1': case '2': case '3':
942 case '4': case '5': case '6': case '7':
944 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
945 c = c * 8 + *p++ - '0';
950 /* Handle \x{..} specially - new Perl thing for utf8 */
954 unsigned char *pt = p;
956 while (isxdigit(*(++pt)))
957 c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
960 unsigned char buffer[8];
962 utn = ord2utf8(c, buffer);
963 for (ii = 0; ii < utn - 1; ii++) *q++ = buffer[ii];
964 c = buffer[ii]; /* Last byte */
968 /* Not correct form; fall through */
974 while (i++ < 2 && isxdigit(*p))
976 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
981 case 0: /* Allows for an empty line */
985 case 'A': /* Option setting */
986 options |= PCRE_ANCHORED;
990 options |= PCRE_NOTBOL;
994 while(isdigit(*p)) n = n * 10 + *p++ - '0';
995 copystrings |= 1 << n;
999 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1000 getstrings |= 1 << n;
1008 options |= PCRE_NOTEMPTY;
1012 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1013 if (n > size_offsets_max)
1015 size_offsets_max = n;
1017 use_offsets = offsets = malloc(size_offsets_max * sizeof(int));
1018 if (offsets == NULL)
1020 printf("** Failed to get %d bytes of memory for offsets vector\n",
1021 size_offsets_max * sizeof(int));
1025 use_size_offsets = n;
1026 if (n == 0) use_offsets = NULL;
1030 options |= PCRE_NOTEOL;
1038 /* Handle matching via the POSIX interface, which does not
1041 #if !defined NOPOSIX
1042 if (posix || do_posix)
1046 regmatch_t *pmatch = malloc(sizeof(regmatch_t) * use_size_offsets);
1047 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
1048 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
1050 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
1054 (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));
1055 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
1060 for (i = 0; i < use_size_offsets; i++)
1062 if (pmatch[i].rm_so >= 0)
1064 fprintf(outfile, "%2d: ", (int)i);
1065 pchars(dbuffer + pmatch[i].rm_so,
1066 pmatch[i].rm_eo - pmatch[i].rm_so, utf8);
1067 fprintf(outfile, "\n");
1068 if (i == 0 && do_showrest)
1070 fprintf(outfile, " 0+ ");
1071 pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo, utf8);
1072 fprintf(outfile, "\n");
1080 /* Handle matching via the native interface - repeats for /g and /G */
1083 #endif /* !defined NOPOSIX */
1085 for (;; gmatched++) /* Loop for /g or /G */
1091 clock_t start_time = clock();
1092 for (i = 0; i < LOOPREPEAT; i++)
1093 count = pcre_exec(re, extra, (char *)bptr, len,
1094 start_offset, options | g_notempty, use_offsets, use_size_offsets);
1095 time_taken = clock() - start_time;
1096 fprintf(outfile, "Execute time %.3f milliseconds\n",
1097 ((double)time_taken * 1000.0)/
1098 ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));
1101 count = pcre_exec(re, extra, (char *)bptr, len,
1102 start_offset, options | g_notempty, use_offsets, use_size_offsets);
1106 fprintf(outfile, "Matched, but too many substrings\n");
1107 count = use_size_offsets/3;
1115 for (i = 0; i < count * 2; i += 2)
1117 if (use_offsets[i] < 0)
1118 fprintf(outfile, "%2d: <unset>\n", i/2);
1121 fprintf(outfile, "%2d: ", i/2);
1122 pchars(bptr + use_offsets[i], use_offsets[i+1] - use_offsets[i], utf8);
1123 fprintf(outfile, "\n");
1128 fprintf(outfile, " 0+ ");
1129 pchars(bptr + use_offsets[i+1], len - use_offsets[i+1], utf8);
1130 fprintf(outfile, "\n");
1136 for (i = 0; i < 32; i++)
1138 if ((copystrings & (1 << i)) != 0)
1140 char copybuffer[16];
1141 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
1142 i, copybuffer, sizeof(copybuffer));
1144 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
1146 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
1150 for (i = 0; i < 32; i++)
1152 if ((getstrings & (1 << i)) != 0)
1154 const char *substring;
1155 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
1158 fprintf(outfile, "get substring %d failed %d\n", i, rc);
1161 fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
1162 /* free((void *)substring); */
1163 pcre_free_substring(substring);
1170 const char **stringlist;
1171 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
1174 fprintf(outfile, "get substring list failed %d\n", rc);
1177 for (i = 0; i < count; i++)
1178 fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
1179 if (stringlist[i] != NULL)
1180 fprintf(outfile, "string list not terminated by NULL\n");
1181 /* free((void *)stringlist); */
1182 pcre_free_substring_list(stringlist);
1187 /* Failed to match. If this is a /g or /G loop and we previously set
1188 g_notempty after a null match, this is not necessarily the end.
1189 We want to advance the start offset, and continue. Fudge the offset
1190 values to achieve this. We won't be at the end of the string - that
1191 was checked before setting g_notempty. */
1195 if (g_notempty != 0)
1197 use_offsets[0] = start_offset;
1198 use_offsets[1] = start_offset + 1;
1202 if (gmatched == 0) /* Error if no previous matches */
1204 if (count == -1) fprintf(outfile, "No match\n");
1205 else fprintf(outfile, "Error %d\n", count);
1207 break; /* Out of the /g loop */
1211 /* If not /g or /G we are done */
1213 if (!do_g && !do_G) break;
1215 /* If we have matched an empty string, first check to see if we are at
1216 the end of the subject. If so, the /g loop is over. Otherwise, mimic
1217 what Perl's /g options does. This turns out to be rather cunning. First
1218 we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
1219 same point. If this fails (picked up above) we advance to the next
1223 if (use_offsets[0] == use_offsets[1])
1225 if (use_offsets[0] == len) break;
1226 g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
1229 /* For /g, update the start offset, leaving the rest alone */
1231 if (do_g) start_offset = use_offsets[1];
1233 /* For /G, update the pointer and length */
1237 bptr += use_offsets[1];
1238 len -= use_offsets[1];
1240 } /* End of loop for /g and /G */
1241 } /* End of loop for data lines */
1245 #if !defined NOPOSIX
1246 if (posix || do_posix) regfree(&preg);
1249 if (re != NULL) free(re);
1250 if (extra != NULL) free(extra);
1253 free((void *)tables);
1254 setlocale(LC_CTYPE, "C");
1258 fprintf(outfile, "\n");