php-internal-docs 8.4.8
Unofficial docs for php/php-src
Loading...
Searching...
No Matches
pcre2_convert.c
Go to the documentation of this file.
1/*************************************************
2* Perl-Compatible Regular Expressions *
3*************************************************/
4
5/* PCRE is a library of functions to support regular expressions whose syntax
6and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Original API code Copyright (c) 1997-2012 University of Cambridge
10 New API code Copyright (c) 2016-2022 University of Cambridge
11
12-----------------------------------------------------------------------------
13Redistribution and use in source and binary forms, with or without
14modification, are permitted provided that the following conditions are met:
15
16 * Redistributions of source code must retain the above copyright notice,
17 this list of conditions and the following disclaimer.
18
19 * Redistributions in binary form must reproduce the above copyright
20 notice, this list of conditions and the following disclaimer in the
21 documentation and/or other materials provided with the distribution.
22
23 * Neither the name of the University of Cambridge nor the names of its
24 contributors may be used to endorse or promote products derived from
25 this software without specific prior written permission.
26
27THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37POSSIBILITY OF SUCH DAMAGE.
38-----------------------------------------------------------------------------
39*/
40
41
42#ifdef HAVE_CONFIG_H
43#include "config.h"
44#endif
45
46#include "pcre2_internal.h"
47
48#define TYPE_OPTIONS (PCRE2_CONVERT_GLOB| \
49 PCRE2_CONVERT_POSIX_BASIC|PCRE2_CONVERT_POSIX_EXTENDED)
50
51#define ALL_OPTIONS (PCRE2_CONVERT_UTF|PCRE2_CONVERT_NO_UTF_CHECK| \
52 PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR| \
53 PCRE2_CONVERT_GLOB_NO_STARSTAR| \
54 TYPE_OPTIONS)
55
56#define DUMMY_BUFFER_SIZE 100
57
58/* Generated pattern fragments */
59
60#define STR_BACKSLASH_A STR_BACKSLASH STR_A
61#define STR_BACKSLASH_z STR_BACKSLASH STR_z
62#define STR_COLON_RIGHT_SQUARE_BRACKET STR_COLON STR_RIGHT_SQUARE_BRACKET
63#define STR_DOT_STAR_LOOKBEHIND STR_DOT STR_ASTERISK STR_LEFT_PARENTHESIS STR_QUESTION_MARK STR_LESS_THAN_SIGN STR_EQUALS_SIGN
64#define STR_LOOKAHEAD_NOT_DOT STR_LEFT_PARENTHESIS STR_QUESTION_MARK STR_EXCLAMATION_MARK STR_BACKSLASH STR_DOT STR_RIGHT_PARENTHESIS
65#define STR_QUERY_s STR_LEFT_PARENTHESIS STR_QUESTION_MARK STR_s STR_RIGHT_PARENTHESIS
66#define STR_STAR_NUL STR_LEFT_PARENTHESIS STR_ASTERISK STR_N STR_U STR_L STR_RIGHT_PARENTHESIS
67
68/* States for POSIX processing */
69
72
73/* Macro to add a character string to the output buffer, checking for overflow. */
74
75#define PUTCHARS(string) \
76 { \
77 for (s = (char *)(string); *s != 0; s++) \
78 { \
79 if (p >= endp) return PCRE2_ERROR_NOMEMORY; \
80 *p++ = *s; \
81 } \
82 }
83
84/* Literals that must be escaped: \ ? * + | . ^ $ { } [ ] ( ) */
85
86static const char *pcre2_escaped_literals =
92
93/* Recognized escaped metacharacters in POSIX basic patterns. */
94
95static const char *posix_meta_escapes =
99
100
101
102/*************************************************
103* Convert a POSIX pattern *
104*************************************************/
105
106/* This function handles both basic and extended POSIX patterns.
107
108Arguments:
109 pattype the pattern type
110 pattern the pattern
111 plength length in code units
112 utf TRUE if UTF
113 use_buffer where to put the output
114 use_length length of use_buffer
115 bufflenptr where to put the used length
116 dummyrun TRUE if a dummy run
117 ccontext the convert context
118
119Returns: 0 => success
120 !0 => error code
121*/
122
123static int
124convert_posix(uint32_t pattype, PCRE2_SPTR pattern, PCRE2_SIZE plength,
125 BOOL utf, PCRE2_UCHAR *use_buffer, PCRE2_SIZE use_length,
126 PCRE2_SIZE *bufflenptr, BOOL dummyrun, pcre2_convert_context *ccontext)
127{
128char *s;
129PCRE2_SPTR posix = pattern;
130PCRE2_UCHAR *p = use_buffer;
131PCRE2_UCHAR *pp = p;
132PCRE2_UCHAR *endp = p + use_length - 1; /* Allow for trailing zero */
133PCRE2_SIZE convlength = 0;
134
135uint32_t bracount = 0;
136uint32_t posix_state = POSIX_START_REGEX;
137uint32_t lastspecial = 0;
138BOOL extended = (pattype & PCRE2_CONVERT_POSIX_EXTENDED) != 0;
139BOOL nextisliteral = FALSE;
140
141(void)utf; /* Not used when Unicode not supported */
142(void)ccontext; /* Not currently used */
143
144/* Initialize default for error offset as end of input. */
145
146*bufflenptr = plength;
148
149/* Now scan the input. */
150
151while (plength > 0)
152 {
153 uint32_t c, sc;
154 int clength = 1;
155
156 /* Add in the length of the last item, then, if in the dummy run, pull the
157 pointer back to the start of the (temporary) buffer and then remember the
158 start of the next item. */
159
160 convlength += p - pp;
161 if (dummyrun) p = use_buffer;
162 pp = p;
163
164 /* Pick up the next character */
165
166#ifndef SUPPORT_UNICODE
167 c = *posix;
168#else
169 GETCHARLENTEST(c, posix, clength);
170#endif
171 posix += clength;
172 plength -= clength;
173
174 sc = nextisliteral? 0 : c;
175 nextisliteral = FALSE;
176
177 /* Handle a character within a class. */
178
179 if (posix_state >= POSIX_CLASS_NOT_STARTED)
180 {
182 {
184 posix_state = POSIX_NOT_BRACKET;
185 }
186
187 /* Not the end of the class */
188
189 else
190 {
191 switch (posix_state)
192 {
194 if (c <= 127 && islower(c)) break; /* Remain in started state */
195 posix_state = POSIX_CLASS_NOT_STARTED;
196 if (c == CHAR_COLON && plength > 0 &&
198 {
200 plength--;
201 posix++;
202 continue; /* With next character after :] */
203 }
204 /* Fall through */
205
208 posix_state = POSIX_CLASS_STARTING;
209 break;
210
212 if (c == CHAR_COLON) posix_state = POSIX_CLASS_STARTED;
213 break;
214 }
215
217 if (p + clength > endp) return PCRE2_ERROR_NOMEMORY;
218 memcpy(p, posix - clength, CU2BYTES(clength));
219 p += clength;
220 }
221 }
222
223 /* Handle a character not within a class. */
224
225 else switch(sc)
226 {
229
230#ifdef NEVER
231 /* We could handle special cases [[:<:]] and [[:>:]] (which PCRE does
232 support) but they are not part of POSIX 1003.1. */
233
234 if (plength >= 6)
235 {
236 if (posix[0] == CHAR_LEFT_SQUARE_BRACKET &&
237 posix[1] == CHAR_COLON &&
238 (posix[2] == CHAR_LESS_THAN_SIGN ||
239 posix[2] == CHAR_GREATER_THAN_SIGN) &&
240 posix[3] == CHAR_COLON &&
241 posix[4] == CHAR_RIGHT_SQUARE_BRACKET &&
242 posix[5] == CHAR_RIGHT_SQUARE_BRACKET)
243 {
244 if (p + 6 > endp) return PCRE2_ERROR_NOMEMORY;
245 memcpy(p, posix, CU2BYTES(6));
246 p += 6;
247 posix += 6;
248 plength -= 6;
249 continue; /* With next character */
250 }
251 }
252#endif
253
254 /* Handle start of "normal" character classes */
255
256 posix_state = POSIX_CLASS_NOT_STARTED;
257
258 /* Handle ^ and ] as first characters */
259
260 if (plength > 0)
261 {
262 if (*posix == CHAR_CIRCUMFLEX_ACCENT)
263 {
264 posix++;
265 plength--;
267 }
268 if (plength > 0 && *posix == CHAR_RIGHT_SQUARE_BRACKET)
269 {
270 posix++;
271 plength--;
273 }
274 }
275 break;
276
277 case CHAR_BACKSLASH:
278 if (plength == 0) return PCRE2_ERROR_END_BACKSLASH;
279 if (extended) nextisliteral = TRUE; else
280 {
281 if (*posix < 127 && strchr(posix_meta_escapes, *posix) != NULL)
282 {
283 if (isdigit(*posix)) PUTCHARS(STR_BACKSLASH);
284 if (p + 1 > endp) return PCRE2_ERROR_NOMEMORY;
285 lastspecial = *p++ = *posix++;
286 plength--;
287 }
288 else nextisliteral = TRUE;
289 }
290 break;
291
293 if (!extended || bracount == 0) goto ESCAPE_LITERAL;
294 bracount--;
295 goto COPY_SPECIAL;
296
298 bracount++;
299 /* Fall through */
300
302 case CHAR_PLUS:
306 if (!extended) goto ESCAPE_LITERAL;
307 /* Fall through */
308
309 case CHAR_DOT:
310 case CHAR_DOLLAR_SIGN:
311 posix_state = POSIX_NOT_BRACKET;
312 COPY_SPECIAL:
313 lastspecial = c;
314 if (p + 1 > endp) return PCRE2_ERROR_NOMEMORY;
315 *p++ = c;
316 break;
317
318 case CHAR_ASTERISK:
319 if (lastspecial != CHAR_ASTERISK)
320 {
321 if (!extended && (posix_state < POSIX_NOT_BRACKET ||
322 lastspecial == CHAR_LEFT_PARENTHESIS))
323 goto ESCAPE_LITERAL;
324 goto COPY_SPECIAL;
325 }
326 break; /* Ignore second and subsequent asterisks */
327
329 if (extended) goto COPY_SPECIAL;
330 if (posix_state == POSIX_START_REGEX ||
331 lastspecial == CHAR_LEFT_PARENTHESIS)
332 {
333 posix_state = POSIX_ANCHORED;
334 goto COPY_SPECIAL;
335 }
336 /* Fall through */
337
338 default:
339 if (c < 128 && strchr(pcre2_escaped_literals, c) != NULL)
340 {
341 ESCAPE_LITERAL:
343 }
344 lastspecial = 0xff; /* Indicates nothing special */
345 if (p + clength > endp) return PCRE2_ERROR_NOMEMORY;
346 memcpy(p, posix - clength, CU2BYTES(clength));
347 p += clength;
348 posix_state = POSIX_NOT_BRACKET;
349 break;
350 }
351 }
352
353if (posix_state >= POSIX_CLASS_NOT_STARTED)
355convlength += p - pp; /* Final segment */
356*bufflenptr = convlength;
357*p++ = 0;
358return 0;
359}
360
361
362/*************************************************
363* Convert a glob pattern *
364*************************************************/
365
366/* Context for writing the output into a buffer. */
367
368typedef struct pcre2_output_context {
369 PCRE2_UCHAR *output; /* current output position */
370 PCRE2_SPTR output_end; /* output end */
371 PCRE2_SIZE output_size; /* size of the output */
372 uint8_t out_str[8]; /* string copied to the output */
374
375
376/* Write a character into the output.
377
378Arguments:
379 out output context
380 chr the next character
381*/
382
383static void
384convert_glob_write(pcre2_output_context *out, PCRE2_UCHAR chr)
385{
386out->output_size++;
387
388if (out->output < out->output_end)
389 *out->output++ = chr;
390}
391
392
393/* Write a string into the output.
394
395Arguments:
396 out output context
397 length length of out->out_str
398*/
399
400static void
401convert_glob_write_str(pcre2_output_context *out, PCRE2_SIZE length)
402{
403uint8_t *out_str = out->out_str;
404PCRE2_UCHAR *output = out->output;
405PCRE2_SPTR output_end = out->output_end;
406PCRE2_SIZE output_size = out->output_size;
407
408do
409 {
410 output_size++;
411
412 if (output < output_end)
413 *output++ = *out_str++;
414 }
415while (--length != 0);
416
417out->output = output;
418out->output_size = output_size;
419}
420
421
422/* Prints the separator into the output.
423
424Arguments:
425 out output context
426 separator glob separator
427 with_escape backslash is needed before separator
428*/
429
430static void
431convert_glob_print_separator(pcre2_output_context *out,
432 PCRE2_UCHAR separator, BOOL with_escape)
433{
434if (with_escape)
435 convert_glob_write(out, CHAR_BACKSLASH);
436
437convert_glob_write(out, separator);
438}
439
440
441/* Prints a wildcard into the output.
442
443Arguments:
444 out output context
445 separator glob separator
446 with_escape backslash is needed before separator
447*/
448
449static void
450convert_glob_print_wildcard(pcre2_output_context *out,
451 PCRE2_UCHAR separator, BOOL with_escape)
452{
453out->out_str[0] = CHAR_LEFT_SQUARE_BRACKET;
454out->out_str[1] = CHAR_CIRCUMFLEX_ACCENT;
455convert_glob_write_str(out, 2);
456
457convert_glob_print_separator(out, separator, with_escape);
458
459convert_glob_write(out, CHAR_RIGHT_SQUARE_BRACKET);
460}
461
462
463/* Parse a posix class.
464
465Arguments:
466 from starting point of scanning the range
467 pattern_end end of pattern
468 out output context
469
470Returns: >0 => class index
471 0 => malformed class
472*/
473
474static int
475convert_glob_parse_class(PCRE2_SPTR *from, PCRE2_SPTR pattern_end,
477{
478static const char *posix_classes = "alnum:alpha:ascii:blank:cntrl:digit:"
479 "graph:lower:print:punct:space:upper:word:xdigit:";
480PCRE2_SPTR start = *from + 1;
481PCRE2_SPTR pattern = start;
482const char *class_ptr;
484int class_index;
485
486while (TRUE)
487 {
488 if (pattern >= pattern_end) return 0;
489
490 c = *pattern++;
491
492 if (c < CHAR_a || c > CHAR_z) break;
493 }
494
495if (c != CHAR_COLON || pattern >= pattern_end ||
496 *pattern != CHAR_RIGHT_SQUARE_BRACKET)
497 return 0;
498
499class_ptr = posix_classes;
500class_index = 1;
501
502while (TRUE)
503 {
504 if (*class_ptr == CHAR_NUL) return 0;
505
506 pattern = start;
507
508 while (*pattern == (PCRE2_UCHAR) *class_ptr)
509 {
510 if (*pattern == CHAR_COLON)
511 {
512 pattern += 2;
513 start -= 2;
514
515 do convert_glob_write(out, *start++); while (start < pattern);
516
517 *from = pattern;
518 return class_index;
519 }
520 pattern++;
521 class_ptr++;
522 }
523
524 while (*class_ptr != CHAR_COLON) class_ptr++;
525 class_ptr++;
526 class_index++;
527 }
528}
529
530/* Checks whether the character is in the class.
531
532Arguments:
533 class_index class index
534 c character
535
536Returns: !0 => character is found in the class
537 0 => otherwise
538*/
539
540static BOOL
541convert_glob_char_in_class(int class_index, PCRE2_UCHAR c)
542{
543#if PCRE2_CODE_UNIT_WIDTH != 8
544if (c > 0xff)
545 {
546 /* ctype functions are not sane for c > 0xff */
547 return 0;
548 }
549#endif
550
551switch (class_index)
552 {
553 case 1: return isalnum(c);
554 case 2: return isalpha(c);
555 case 3: return 1;
556 case 4: return c == CHAR_HT || c == CHAR_SPACE;
557 case 5: return iscntrl(c);
558 case 6: return isdigit(c);
559 case 7: return isgraph(c);
560 case 8: return islower(c);
561 case 9: return isprint(c);
562 case 10: return ispunct(c);
563 case 11: return isspace(c);
564 case 12: return isupper(c);
565 case 13: return isalnum(c) || c == CHAR_UNDERSCORE;
566 default: return isxdigit(c);
567 }
568}
569
570/* Parse a range of characters.
571
572Arguments:
573 from starting point of scanning the range
574 pattern_end end of pattern
575 out output context
576 separator glob separator
577 with_escape backslash is needed before separator
578
579Returns: 0 => success
580 !0 => error code
581*/
582
583static int
584convert_glob_parse_range(PCRE2_SPTR *from, PCRE2_SPTR pattern_end,
585 pcre2_output_context *out, BOOL utf, PCRE2_UCHAR separator,
586 BOOL with_escape, PCRE2_UCHAR escape, BOOL no_wildsep)
587{
588BOOL is_negative = FALSE;
589BOOL separator_seen = FALSE;
590BOOL has_prev_c;
591PCRE2_SPTR pattern = *from;
592PCRE2_SPTR char_start = NULL;
593uint32_t c, prev_c;
594int len, class_index;
595
596(void)utf; /* Avoid compiler warning. */
597
598if (pattern >= pattern_end)
599 {
600 *from = pattern;
602 }
603
604if (*pattern == CHAR_EXCLAMATION_MARK
605 || *pattern == CHAR_CIRCUMFLEX_ACCENT)
606 {
607 pattern++;
608
609 if (pattern >= pattern_end)
610 {
611 *from = pattern;
613 }
614
615 is_negative = TRUE;
616
617 out->out_str[0] = CHAR_LEFT_SQUARE_BRACKET;
618 out->out_str[1] = CHAR_CIRCUMFLEX_ACCENT;
619 len = 2;
620
621 if (!no_wildsep)
622 {
623 if (with_escape)
624 {
625 out->out_str[len] = CHAR_BACKSLASH;
626 len++;
627 }
628 out->out_str[len] = (uint8_t) separator;
629 }
630
631 convert_glob_write_str(out, len + 1);
632 }
633else
634 convert_glob_write(out, CHAR_LEFT_SQUARE_BRACKET);
635
636has_prev_c = FALSE;
637prev_c = 0;
638
639if (*pattern == CHAR_RIGHT_SQUARE_BRACKET)
640 {
641 out->out_str[0] = CHAR_BACKSLASH;
642 out->out_str[1] = CHAR_RIGHT_SQUARE_BRACKET;
643 convert_glob_write_str(out, 2);
644 has_prev_c = TRUE;
646 pattern++;
647 }
648
649while (pattern < pattern_end)
650 {
651 char_start = pattern;
652 GETCHARINCTEST(c, pattern);
653
655 {
656 convert_glob_write(out, c);
657
658 if (!is_negative && !no_wildsep && separator_seen)
659 {
660 out->out_str[0] = CHAR_LEFT_PARENTHESIS;
661 out->out_str[1] = CHAR_QUESTION_MARK;
662 out->out_str[2] = CHAR_LESS_THAN_SIGN;
663 out->out_str[3] = CHAR_EXCLAMATION_MARK;
664 convert_glob_write_str(out, 4);
665
666 convert_glob_print_separator(out, separator, with_escape);
667 convert_glob_write(out, CHAR_RIGHT_PARENTHESIS);
668 }
669
670 *from = pattern;
671 return 0;
672 }
673
674 if (pattern >= pattern_end) break;
675
676 if (c == CHAR_LEFT_SQUARE_BRACKET && *pattern == CHAR_COLON)
677 {
678 *from = pattern;
679 class_index = convert_glob_parse_class(from, pattern_end, out);
680
681 if (class_index != 0)
682 {
683 pattern = *from;
684
685 has_prev_c = FALSE;
686 prev_c = 0;
687
688 if (!is_negative &&
689 convert_glob_char_in_class (class_index, separator))
690 separator_seen = TRUE;
691 continue;
692 }
693 }
694 else if (c == CHAR_MINUS && has_prev_c &&
695 *pattern != CHAR_RIGHT_SQUARE_BRACKET)
696 {
697 convert_glob_write(out, CHAR_MINUS);
698
699 char_start = pattern;
700 GETCHARINCTEST(c, pattern);
701
702 if (pattern >= pattern_end) break;
703
704 if (escape != 0 && c == escape)
705 {
706 char_start = pattern;
707 GETCHARINCTEST(c, pattern);
708 }
709 else if (c == CHAR_LEFT_SQUARE_BRACKET && *pattern == CHAR_COLON)
710 {
711 *from = pattern;
713 }
714
715 if (prev_c > c)
716 {
717 *from = pattern;
719 }
720
721 if (prev_c < separator && separator < c) separator_seen = TRUE;
722
723 has_prev_c = FALSE;
724 prev_c = 0;
725 }
726 else
727 {
728 if (escape != 0 && c == escape)
729 {
730 char_start = pattern;
731 GETCHARINCTEST(c, pattern);
732
733 if (pattern >= pattern_end) break;
734 }
735
736 has_prev_c = TRUE;
737 prev_c = c;
738 }
739
741 c == CHAR_BACKSLASH || c == CHAR_MINUS)
742 convert_glob_write(out, CHAR_BACKSLASH);
743
744 if (c == separator) separator_seen = TRUE;
745
746 do convert_glob_write(out, *char_start++); while (char_start < pattern);
747 }
748
749*from = pattern;
751}
752
753
754/* Prints a (*COMMIT) into the output.
755
756Arguments:
757 out output context
758*/
759
760static void
761convert_glob_print_commit(pcre2_output_context *out)
762{
763out->out_str[0] = CHAR_LEFT_PARENTHESIS;
764out->out_str[1] = CHAR_ASTERISK;
765out->out_str[2] = CHAR_C;
766out->out_str[3] = CHAR_O;
767out->out_str[4] = CHAR_M;
768out->out_str[5] = CHAR_M;
769out->out_str[6] = CHAR_I;
770out->out_str[7] = CHAR_T;
771convert_glob_write_str(out, 8);
772convert_glob_write(out, CHAR_RIGHT_PARENTHESIS);
773}
774
775
776/* Bash glob converter.
777
778Arguments:
779 pattype the pattern type
780 pattern the pattern
781 plength length in code units
782 utf TRUE if UTF
783 use_buffer where to put the output
784 use_length length of use_buffer
785 bufflenptr where to put the used length
786 dummyrun TRUE if a dummy run
787 ccontext the convert context
788
789Returns: 0 => success
790 !0 => error code
791*/
792
793static int
794convert_glob(uint32_t options, PCRE2_SPTR pattern, PCRE2_SIZE plength,
795 BOOL utf, PCRE2_UCHAR *use_buffer, PCRE2_SIZE use_length,
796 PCRE2_SIZE *bufflenptr, BOOL dummyrun, pcre2_convert_context *ccontext)
797{
799PCRE2_SPTR pattern_start = pattern;
800PCRE2_SPTR pattern_end = pattern + plength;
801PCRE2_UCHAR separator = ccontext->glob_separator;
802PCRE2_UCHAR escape = ccontext->glob_escape;
805BOOL no_starstar = (options & PCRE2_CONVERT_GLOB_NO_STARSTAR) != 0;
806BOOL in_atomic = FALSE;
807BOOL after_starstar = FALSE;
808BOOL no_slash_z = FALSE;
809BOOL with_escape, is_start, after_separator;
810int result = 0;
811
812(void)utf; /* Avoid compiler warning. */
813
814#ifdef SUPPORT_UNICODE
815if (utf && (separator >= 128 || escape >= 128))
816 {
817 /* Currently only ASCII characters are supported. */
818 *bufflenptr = 0;
820 }
821#endif
822
823with_escape = strchr(pcre2_escaped_literals, separator) != NULL;
824
825/* Initialize default for error offset as end of input. */
826out.output = use_buffer;
827out.output_end = use_buffer + use_length;
828out.output_size = 0;
829
830out.out_str[0] = CHAR_LEFT_PARENTHESIS;
831out.out_str[1] = CHAR_QUESTION_MARK;
832out.out_str[2] = CHAR_s;
833out.out_str[3] = CHAR_RIGHT_PARENTHESIS;
834convert_glob_write_str(&out, 4);
835
836is_start = TRUE;
837
838if (pattern < pattern_end && pattern[0] == CHAR_ASTERISK)
839 {
840 if (no_wildsep)
841 is_start = FALSE;
842 else if (!no_starstar && pattern + 1 < pattern_end &&
843 pattern[1] == CHAR_ASTERISK)
844 is_start = FALSE;
845 }
846
847if (is_start)
848 {
849 out.out_str[0] = CHAR_BACKSLASH;
850 out.out_str[1] = CHAR_A;
851 convert_glob_write_str(&out, 2);
852 }
853
854while (pattern < pattern_end)
855 {
856 c = *pattern++;
857
858 if (c == CHAR_ASTERISK)
859 {
860 is_start = pattern == pattern_start + 1;
861
862 if (in_atomic)
863 {
864 convert_glob_write(&out, CHAR_RIGHT_PARENTHESIS);
865 in_atomic = FALSE;
866 }
867
868 if (!no_starstar && pattern < pattern_end && *pattern == CHAR_ASTERISK)
869 {
870 after_separator = is_start || (pattern[-2] == separator);
871
872 do pattern++; while (pattern < pattern_end &&
873 *pattern == CHAR_ASTERISK);
874
875 if (pattern >= pattern_end)
876 {
877 no_slash_z = TRUE;
878 break;
879 }
880
881 after_starstar = TRUE;
882
883 if (after_separator && escape != 0 && *pattern == escape &&
884 pattern + 1 < pattern_end && pattern[1] == separator)
885 pattern++;
886
887 if (is_start)
888 {
889 if (*pattern != separator) continue;
890
891 out.out_str[0] = CHAR_LEFT_PARENTHESIS;
892 out.out_str[1] = CHAR_QUESTION_MARK;
893 out.out_str[2] = CHAR_COLON;
894 out.out_str[3] = CHAR_BACKSLASH;
895 out.out_str[4] = CHAR_A;
896 out.out_str[5] = CHAR_VERTICAL_LINE;
897 convert_glob_write_str(&out, 6);
898
899 convert_glob_print_separator(&out, separator, with_escape);
900 convert_glob_write(&out, CHAR_RIGHT_PARENTHESIS);
901
902 pattern++;
903 continue;
904 }
905
906 convert_glob_print_commit(&out);
907
908 if (!after_separator || *pattern != separator)
909 {
910 out.out_str[0] = CHAR_DOT;
911 out.out_str[1] = CHAR_ASTERISK;
912 out.out_str[2] = CHAR_QUESTION_MARK;
913 convert_glob_write_str(&out, 3);
914 continue;
915 }
916
917 out.out_str[0] = CHAR_LEFT_PARENTHESIS;
918 out.out_str[1] = CHAR_QUESTION_MARK;
919 out.out_str[2] = CHAR_COLON;
920 out.out_str[3] = CHAR_DOT;
921 out.out_str[4] = CHAR_ASTERISK;
922 out.out_str[5] = CHAR_QUESTION_MARK;
923
924 convert_glob_write_str(&out, 6);
925
926 convert_glob_print_separator(&out, separator, with_escape);
927
928 out.out_str[0] = CHAR_RIGHT_PARENTHESIS;
929 out.out_str[1] = CHAR_QUESTION_MARK;
930 out.out_str[2] = CHAR_QUESTION_MARK;
931 convert_glob_write_str(&out, 3);
932
933 pattern++;
934 continue;
935 }
936
937 if (pattern < pattern_end && *pattern == CHAR_ASTERISK)
938 {
939 do pattern++; while (pattern < pattern_end &&
940 *pattern == CHAR_ASTERISK);
941 }
942
943 if (no_wildsep)
944 {
945 if (pattern >= pattern_end)
946 {
947 no_slash_z = TRUE;
948 break;
949 }
950
951 /* Start check must be after the end check. */
952 if (is_start) continue;
953 }
954
955 if (!is_start)
956 {
957 if (after_starstar)
958 {
959 out.out_str[0] = CHAR_LEFT_PARENTHESIS;
960 out.out_str[1] = CHAR_QUESTION_MARK;
961 out.out_str[2] = CHAR_GREATER_THAN_SIGN;
962 convert_glob_write_str(&out, 3);
963 in_atomic = TRUE;
964 }
965 else
966 convert_glob_print_commit(&out);
967 }
968
969 if (no_wildsep)
970 convert_glob_write(&out, CHAR_DOT);
971 else
972 convert_glob_print_wildcard(&out, separator, with_escape);
973
974 out.out_str[0] = CHAR_ASTERISK;
975 out.out_str[1] = CHAR_QUESTION_MARK;
976 if (pattern >= pattern_end)
977 out.out_str[1] = CHAR_PLUS;
978 convert_glob_write_str(&out, 2);
979 continue;
980 }
981
982 if (c == CHAR_QUESTION_MARK)
983 {
984 if (no_wildsep)
985 convert_glob_write(&out, CHAR_DOT);
986 else
987 convert_glob_print_wildcard(&out, separator, with_escape);
988 continue;
989 }
990
992 {
993 result = convert_glob_parse_range(&pattern, pattern_end,
994 &out, utf, separator, with_escape, escape, no_wildsep);
995 if (result != 0) break;
996 continue;
997 }
998
999 if (escape != 0 && c == escape)
1000 {
1001 if (pattern >= pattern_end)
1002 {
1004 break;
1005 }
1006 c = *pattern++;
1007 }
1008
1009 if (c < 128 && strchr(pcre2_escaped_literals, c) != NULL)
1010 convert_glob_write(&out, CHAR_BACKSLASH);
1011
1012 convert_glob_write(&out, c);
1013 }
1014
1015if (result == 0)
1016 {
1017 if (!no_slash_z)
1018 {
1019 out.out_str[0] = CHAR_BACKSLASH;
1020 out.out_str[1] = CHAR_z;
1021 convert_glob_write_str(&out, 2);
1022 }
1023
1024 if (in_atomic)
1025 convert_glob_write(&out, CHAR_RIGHT_PARENTHESIS);
1026
1027 convert_glob_write(&out, CHAR_NUL);
1028
1029 if (!dummyrun && out.output_size != (PCRE2_SIZE) (out.output - use_buffer))
1031 }
1032
1033if (result != 0)
1034 {
1035 *bufflenptr = pattern - pattern_start;
1036 return result;
1037 }
1038
1039*bufflenptr = out.output_size - 1;
1040return 0;
1041}
1042
1043
1044/*************************************************
1045* Convert pattern *
1046*************************************************/
1047
1048/* This is the external-facing function for converting other forms of pattern
1049into PCRE2 regular expression patterns. On error, the bufflenptr argument is
1050used to return an offset in the original pattern.
1051
1052Arguments:
1053 pattern the input pattern
1054 plength length of input, or PCRE2_ZERO_TERMINATED
1055 options options bits
1056 buffptr pointer to pointer to output buffer
1057 bufflenptr pointer to length of output buffer
1058 ccontext convert context or NULL
1059
1060Returns: 0 for success, else an error code (+ve or -ve)
1061*/
1062
1065 PCRE2_UCHAR **buffptr, PCRE2_SIZE *bufflenptr,
1066 pcre2_convert_context *ccontext)
1067{
1068int i, rc;
1069PCRE2_UCHAR dummy_buffer[DUMMY_BUFFER_SIZE];
1070PCRE2_UCHAR *use_buffer = dummy_buffer;
1071PCRE2_SIZE use_length = DUMMY_BUFFER_SIZE;
1072BOOL utf = (options & PCRE2_CONVERT_UTF) != 0;
1073uint32_t pattype = options & TYPE_OPTIONS;
1074
1075if (pattern == NULL || bufflenptr == NULL) return PCRE2_ERROR_NULL;
1076
1077if ((options & ~ALL_OPTIONS) != 0 || /* Undefined bit set */
1078 (pattype & (~pattype+1)) != pattype || /* More than one type set */
1079 pattype == 0) /* No type set */
1080 {
1081 *bufflenptr = 0; /* Error offset */
1082 return PCRE2_ERROR_BADOPTION;
1083 }
1084
1085if (plength == PCRE2_ZERO_TERMINATED) plength = PRIV(strlen)(pattern);
1086if (ccontext == NULL) ccontext =
1087 (pcre2_convert_context *)(&PRIV(default_convert_context));
1088
1089/* Check UTF if required. */
1090
1091#ifndef SUPPORT_UNICODE
1092if (utf)
1093 {
1094 *bufflenptr = 0; /* Error offset */
1096 }
1097#else
1098if (utf && (options & PCRE2_CONVERT_NO_UTF_CHECK) == 0)
1099 {
1100 PCRE2_SIZE erroroffset;
1101 rc = PRIV(valid_utf)(pattern, plength, &erroroffset);
1102 if (rc != 0)
1103 {
1104 *bufflenptr = erroroffset;
1105 return rc;
1106 }
1107 }
1108#endif
1109
1110/* If buffptr is not NULL, and what it points to is not NULL, we are being
1111provided with a buffer and a length, so set them as the buffer to use. */
1112
1113if (buffptr != NULL && *buffptr != NULL)
1114 {
1115 use_buffer = *buffptr;
1116 use_length = *bufflenptr;
1117 }
1118
1119/* Call an individual converter, either just once (if a buffer was provided or
1120just the length is needed), or twice (if a memory allocation is required). */
1121
1122for (i = 0; i < 2; i++)
1123 {
1124 PCRE2_UCHAR *allocated;
1125 BOOL dummyrun = buffptr == NULL || *buffptr == NULL;
1126
1127 switch(pattype)
1128 {
1129 case PCRE2_CONVERT_GLOB:
1130 rc = convert_glob(options & ~PCRE2_CONVERT_GLOB, pattern, plength, utf,
1131 use_buffer, use_length, bufflenptr, dummyrun, ccontext);
1132 break;
1133
1136 rc = convert_posix(pattype, pattern, plength, utf, use_buffer, use_length,
1137 bufflenptr, dummyrun, ccontext);
1138 break;
1139
1140 default:
1141 *bufflenptr = 0; /* Error offset */
1142 return PCRE2_ERROR_INTERNAL;
1143 }
1144
1145 if (rc != 0 || /* Error */
1146 buffptr == NULL || /* Just the length is required */
1147 *buffptr != NULL) /* Buffer was provided or allocated */
1148 return rc;
1149
1150 /* Allocate memory for the buffer, with hidden space for an allocator at
1151 the start. The next time round the loop runs the conversion for real. */
1152
1153 allocated = PRIV(memctl_malloc)(sizeof(pcre2_memctl) +
1154 (*bufflenptr + 1)*PCRE2_CODE_UNIT_WIDTH, (pcre2_memctl *)ccontext);
1155 if (allocated == NULL) return PCRE2_ERROR_NOMEMORY;
1156 *buffptr = (PCRE2_UCHAR *)(((char *)allocated) + sizeof(pcre2_memctl));
1157
1158 use_buffer = *buffptr;
1159 use_length = *bufflenptr + 1;
1160 }
1161
1162/* Control should never get here. */
1163
1165}
1166
1167
1168/*************************************************
1169* Free converted pattern *
1170*************************************************/
1171
1172/* This frees a converted pattern that was put in newly-allocated memory.
1173
1174Argument: the converted pattern
1175Returns: nothing
1176*/
1177
1180{
1181if (converted != NULL)
1182 {
1183 pcre2_memctl *memctl =
1184 (pcre2_memctl *)((char *)converted - sizeof(pcre2_memctl));
1185 memctl->free(memctl, memctl->memory_data);
1186 }
1187}
1188
1189/* End of pcre2_convert.c */
size_t len
Definition apprentice.c:174
chr(int $codepoint)
strchr(string $haystack, string $needle, bool $before_needle=false)
char s[4]
Definition cdf.c:77
memcpy(ptr1, ptr2, size)
buf start
Definition ffi.c:4687
#define TRUE
Definition gd_gd.c:7
#define FALSE
Definition gd_gd.c:8
#define NULL
Definition gdcache.h:45
#define pcre2_convert_context
Definition pcre2.h:842
#define PCRE2_ZERO_TERMINATED
Definition pcre2.h:481
#define PCRE2_ERROR_BADOPTION
Definition pcre2.h:375
#define PCRE2_ERROR_INTERNAL
Definition pcre2.h:385
#define PCRE2_UCHAR
Definition pcre2.h:819
#define PCRE2_CONVERT_POSIX_BASIC
Definition pcre2.h:200
#define pcre2_converted_pattern_free
Definition pcre2.h:861
#define PCRE2_ERROR_UNICODE_NOT_SUPPORTED
Definition pcre2.h:254
#define PCRE2_CONVERT_POSIX_EXTENDED
Definition pcre2.h:201
#define PCRE2_SIZE
Definition pcre2.h:479
#define PCRE2_SPTR
Definition pcre2.h:820
#define PCRE2_ERROR_NULL
Definition pcre2.h:392
#define PCRE2_CONVERT_GLOB_NO_STARSTAR
Definition pcre2.h:204
#define PCRE2_CONVERT_NO_UTF_CHECK
Definition pcre2.h:199
#define PCRE2_ERROR_CONVERT_SYNTAX
Definition pcre2.h:406
#define PCRE2_ERROR_END_BACKSLASH
Definition pcre2.h:223
#define PCRE2_CALL_CONVENTION
Definition pcre2.h:81
#define PCRE2_CONVERT_UTF
Definition pcre2.h:198
#define PCRE2_CONVERT_GLOB
Definition pcre2.h:202
#define PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR
Definition pcre2.h:203
#define PCRE2_ERROR_NOMEMORY
Definition pcre2.h:389
#define pcre2_pattern_convert
Definition pcre2.h:888
#define PCRE2_ERROR_MISSING_SQUARE_BRACKET
Definition pcre2.h:228
void *PRIV memctl_malloc(size_t size, pcre2_memctl *memctl)
#define PUTCHARS(string)
@ POSIX_NOT_BRACKET
@ POSIX_CLASS_NOT_STARTED
@ POSIX_ANCHORED
@ POSIX_START_REGEX
@ POSIX_CLASS_STARTED
@ POSIX_CLASS_STARTING
#define TYPE_OPTIONS
#define ALL_OPTIONS
#define STR_STAR_NUL
#define STR_COLON_RIGHT_SQUARE_BRACKET
#define DUMMY_BUFFER_SIZE
#define STR_ASTERISK
#define CHAR_DOT
int BOOL
#define STR_7
#define CHAR_LESS_THAN_SIGN
#define CHAR_s
#define STR_2
#define CHAR_DOLLAR_SIGN
#define STR_RIGHT_PARENTHESIS
#define STR_BACKSLASH
#define CHAR_z
#define CHAR_M
#define CHAR_I
#define CHAR_C
#define STR_8
#define CHAR_VERTICAL_LINE
#define STR_LEFT_CURLY_BRACKET
#define CHAR_BACKSLASH
#define CHAR_NUL
#define STR_1
#define CHAR_PLUS
#define CHAR_LEFT_SQUARE_BRACKET
#define CHAR_QUESTION_MARK
#define STR_QUESTION_MARK
#define CHAR_COLON
#define STR_LEFT_SQUARE_BRACKET
#define CHAR_EXCLAMATION_MARK
#define CHAR_O
#define STR_6
#define CHAR_MINUS
#define STR_DOT
#define STR_PLUS
#define STR_RIGHT_CURLY_BRACKET
#define CHAR_RIGHT_PARENTHESIS
#define CHAR_A
#define STR_RIGHT_SQUARE_BRACKET
#define CHAR_RIGHT_SQUARE_BRACKET
#define CHAR_T
#define CHAR_RIGHT_CURLY_BRACKET
#define CHAR_LEFT_PARENTHESIS
#define CHAR_CIRCUMFLEX_ACCENT
#define STR_CIRCUMFLEX_ACCENT
#define PCRE2_EXP_DEFN
#define CHAR_LEFT_CURLY_BRACKET
#define STR_LEFT_PARENTHESIS
#define STR_DOLLAR_SIGN
#define CHAR_HT
#define CHAR_UNDERSCORE
#define STR_4
#define CHAR_GREATER_THAN_SIGN
#define CHAR_ASTERISK
#define STR_VERTICAL_LINE
#define PRIV(name)
#define CHAR_SPACE
#define STR_9
#define STR_5
#define STR_3
#define CU2BYTES(x)
#define GETCHARINCTEST(c, eptr)
int PRIV valid_utf(PCRE2_SPTR string, PCRE2_SIZE length, PCRE2_SIZE *erroroffset)
PHP_JSON_API size_t int options
Definition php_json.h:102
p
Definition session.c:1105
void(* free)(void *, void *)
strlen(string $string)
ZEND_API void(ZEND_FASTCALL *zend_touch_vm_stack_data)(void *vm_stack_data)
bool result
out($f, $s)