php-internal-docs 8.4.8
Unofficial docs for php/php-src
Loading...
Searching...
No Matches
php_mbregex.c
Go to the documentation of this file.
1/*
2 +----------------------------------------------------------------------+
3 | Copyright (c) The PHP Group |
4 +----------------------------------------------------------------------+
5 | This source file is subject to version 3.01 of the PHP license, |
6 | that is bundled with this package in the file LICENSE, and is |
7 | available through the world-wide-web at the following url: |
8 | https://www.php.net/license/3_01.txt |
9 | If you did not receive a copy of the PHP license and are unable to |
10 | obtain it through the world-wide-web, please send a note to |
11 | license@php.net so we can mail you a copy immediately. |
12 +----------------------------------------------------------------------+
13 | Author: Tsukada Takuya <tsukada@fminn.nagano.nagano.jp> |
14 +----------------------------------------------------------------------+
15 */
16
17#include "libmbfl/config.h"
18
19#include "php.h"
20#include "php_ini.h"
21
22#ifdef HAVE_MBREGEX
23
24#include "zend_smart_str.h"
25#include "ext/standard/info.h"
26#include "php_mbregex.h"
27#include "mbstring.h"
29
30#include "php_onig_compat.h" /* must come prior to the oniguruma header */
31#include <oniguruma.h>
32#undef UChar
33
34#if !defined(ONIGURUMA_VERSION_INT) || ONIGURUMA_VERSION_INT < 60800
35typedef void OnigMatchParam;
36#define onig_new_match_param() (NULL)
37#define onig_initialize_match_param(x) (void)(x)
38#define onig_set_match_stack_limit_size_of_match_param(x, y)
39#define onig_set_retry_limit_in_match_of_match_param(x, y)
40#define onig_free_match_param(x)
41#define onig_search_with_param(reg, str, end, start, range, region, option, mp) \
42 onig_search(reg, str, end, start, range, region, option)
43#define onig_match_with_param(re, str, end, at, region, option, mp) \
44 onig_match(re, str, end, at, region, option)
45#endif
46
48
49char php_mb_oniguruma_version[256];
50
51struct _zend_mb_regex_globals {
52 OnigEncoding default_mbctype;
53 OnigEncoding current_mbctype;
54 const mbfl_encoding *current_mbctype_mbfl_encoding;
55 HashTable ht_rc;
56 zval search_str;
57 zval *search_str_val;
58 size_t search_pos;
59 php_mb_regex_t *search_re;
60 OnigRegion *search_regs;
61 OnigOptionType regex_default_options;
62 OnigSyntaxType *regex_default_syntax;
63};
64
65#define MBREX(g) (MBSTRG(mb_regex_globals)->g)
66
67/* {{{ static void php_mb_regex_free_cache() */
68static void php_mb_regex_free_cache(zval *el) {
69 onig_free((php_mb_regex_t *)Z_PTR_P(el));
70}
71/* }}} */
72
73/* {{{ _php_mb_regex_globals_ctor */
74static int _php_mb_regex_globals_ctor(zend_mb_regex_globals *pglobals)
75{
76 pglobals->default_mbctype = ONIG_ENCODING_UTF8;
77 pglobals->current_mbctype = ONIG_ENCODING_UTF8;
78 pglobals->current_mbctype_mbfl_encoding = &mbfl_encoding_utf8;
79 ZVAL_UNDEF(&pglobals->search_str);
80 pglobals->search_re = (php_mb_regex_t*)NULL;
81 pglobals->search_pos = 0;
82 pglobals->search_regs = (OnigRegion*)NULL;
83 pglobals->regex_default_options = ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE;
84 pglobals->regex_default_syntax = ONIG_SYNTAX_RUBY;
85 return SUCCESS;
86}
87/* }}} */
88
89/* {{{ php_mb_regex_globals_alloc */
90zend_mb_regex_globals *php_mb_regex_globals_alloc(void)
91{
92 zend_mb_regex_globals *pglobals = pemalloc(
93 sizeof(zend_mb_regex_globals), 1);
94 if (SUCCESS != _php_mb_regex_globals_ctor(pglobals)) {
95 pefree(pglobals, 1);
96 return NULL;
97 }
98 return pglobals;
99}
100/* }}} */
101
102/* {{{ php_mb_regex_globals_free */
103void php_mb_regex_globals_free(zend_mb_regex_globals *pglobals)
104{
105 if (!pglobals) {
106 return;
107 }
108 pefree(pglobals, 1);
109}
110/* }}} */
111
112/* {{{ PHP_MINIT_FUNCTION(mb_regex) */
113PHP_MINIT_FUNCTION(mb_regex)
114{
115 onig_init();
116
117 snprintf(php_mb_oniguruma_version, sizeof(php_mb_oniguruma_version), "%d.%d.%d",
118 ONIGURUMA_VERSION_MAJOR, ONIGURUMA_VERSION_MINOR, ONIGURUMA_VERSION_TEENY);
119
120 return SUCCESS;
121}
122/* }}} */
123
124/* {{{ PHP_MSHUTDOWN_FUNCTION(mb_regex) */
126{
127 onig_end();
128 return SUCCESS;
129}
130/* }}} */
131
132/* {{{ PHP_RINIT_FUNCTION(mb_regex) */
133PHP_RINIT_FUNCTION(mb_regex)
134{
135 if (!MBSTRG(mb_regex_globals)) return FAILURE;
136 zend_hash_init(&MBREX(ht_rc), 0, NULL, php_mb_regex_free_cache, 0);
137 return SUCCESS;
138}
139/* }}} */
140
141/* {{{ PHP_RSHUTDOWN_FUNCTION(mb_regex) */
143{
144 MBREX(current_mbctype) = MBREX(default_mbctype);
145 MBREX(current_mbctype_mbfl_encoding) = mbfl_name2encoding(php_mb_regex_get_default_mbctype());
146
147 if (!Z_ISUNDEF(MBREX(search_str))) {
148 zval_ptr_dtor(&MBREX(search_str));
149 ZVAL_UNDEF(&MBREX(search_str));
150 }
151 MBREX(search_pos) = 0;
152 MBREX(search_re) = NULL;
153
154 if (MBREX(search_regs) != NULL) {
155 onig_region_free(MBREX(search_regs), 1);
156 MBREX(search_regs) = (OnigRegion *)NULL;
157 }
158 zend_hash_destroy(&MBREX(ht_rc));
159
160 return SUCCESS;
161}
162/* }}} */
163
164/* {{{ PHP_MINFO_FUNCTION(mb_regex) */
165PHP_MINFO_FUNCTION(mb_regex)
166{
167 char buf[32];
169 php_info_print_table_row(2, "Multibyte (japanese) regex support", "enabled");
170 snprintf(buf, sizeof(buf), "%d.%d.%d",
171 ONIGURUMA_VERSION_MAJOR,
172 ONIGURUMA_VERSION_MINOR,
173 ONIGURUMA_VERSION_TEENY);
174 php_info_print_table_row(2, "Multibyte regex (oniguruma) version", buf);
176}
177/* }}} */
178
179/*
180 * encoding name resolver
181 */
182
183/* {{{ encoding name map */
184typedef struct _php_mb_regex_enc_name_map_t {
185 const char *names;
186 OnigEncoding code;
187} php_mb_regex_enc_name_map_t;
188
189static const php_mb_regex_enc_name_map_t enc_name_map[] = {
190#ifdef ONIG_ENCODING_EUC_JP
191 {
192 "EUC-JP\0EUCJP\0X-EUC-JP\0UJIS\0EUCJP\0EUCJP-WIN\0",
193 ONIG_ENCODING_EUC_JP
194 },
195#endif
196#ifdef ONIG_ENCODING_UTF8
197 {
198 "UTF-8\0UTF8\0",
199 ONIG_ENCODING_UTF8
200 },
201#endif
202#ifdef ONIG_ENCODING_UTF16_BE
203 {
204 "UTF-16\0UTF-16BE\0",
205 ONIG_ENCODING_UTF16_BE
206 },
207#endif
208#ifdef ONIG_ENCODING_UTF16_LE
209 {
210 "UTF-16LE\0",
211 ONIG_ENCODING_UTF16_LE
212 },
213#endif
214#ifdef ONIG_ENCODING_UTF32_BE
215 {
216 "UCS-4\0UTF-32\0UTF-32BE\0",
217 ONIG_ENCODING_UTF32_BE
218 },
219#endif
220#ifdef ONIG_ENCODING_UTF32_LE
221 {
222 "UCS-4LE\0UTF-32LE\0",
223 ONIG_ENCODING_UTF32_LE
224 },
225#endif
226#ifdef ONIG_ENCODING_SJIS
227 {
228 "SJIS\0CP932\0MS932\0SHIFT_JIS\0SJIS-WIN\0WINDOWS-31J\0",
229 ONIG_ENCODING_SJIS
230 },
231#endif
232#ifdef ONIG_ENCODING_BIG5
233 {
234 "BIG5\0BIG-5\0BIGFIVE\0CN-BIG5\0BIG-FIVE\0",
235 ONIG_ENCODING_BIG5
236 },
237#endif
238#ifdef ONIG_ENCODING_EUC_CN
239 {
240 "EUC-CN\0EUCCN\0EUC_CN\0GB-2312\0GB2312\0",
241 ONIG_ENCODING_EUC_CN
242 },
243#endif
244#ifdef ONIG_ENCODING_EUC_TW
245 {
246 "EUC-TW\0EUCTW\0EUC_TW\0",
247 ONIG_ENCODING_EUC_TW
248 },
249#endif
250#ifdef ONIG_ENCODING_EUC_KR
251 {
252 "EUC-KR\0EUCKR\0EUC_KR\0",
253 ONIG_ENCODING_EUC_KR
254 },
255#endif
256#if defined(ONIG_ENCODING_KOI8) && !PHP_ONIG_BAD_KOI8_ENTRY
257 {
258 "KOI8\0KOI-8\0",
259 ONIG_ENCODING_KOI8
260 },
261#endif
262#ifdef ONIG_ENCODING_KOI8_R
263 {
264 "KOI8R\0KOI8-R\0KOI-8R\0",
265 ONIG_ENCODING_KOI8_R
266 },
267#endif
268#ifdef ONIG_ENCODING_ISO_8859_1
269 {
270 "ISO-8859-1\0ISO8859-1\0",
271 ONIG_ENCODING_ISO_8859_1
272 },
273#endif
274#ifdef ONIG_ENCODING_ISO_8859_2
275 {
276 "ISO-8859-2\0ISO8859-2\0",
277 ONIG_ENCODING_ISO_8859_2
278 },
279#endif
280#ifdef ONIG_ENCODING_ISO_8859_3
281 {
282 "ISO-8859-3\0ISO8859-3\0",
283 ONIG_ENCODING_ISO_8859_3
284 },
285#endif
286#ifdef ONIG_ENCODING_ISO_8859_4
287 {
288 "ISO-8859-4\0ISO8859-4\0",
289 ONIG_ENCODING_ISO_8859_4
290 },
291#endif
292#ifdef ONIG_ENCODING_ISO_8859_5
293 {
294 "ISO-8859-5\0ISO8859-5\0",
295 ONIG_ENCODING_ISO_8859_5
296 },
297#endif
298#ifdef ONIG_ENCODING_ISO_8859_6
299 {
300 "ISO-8859-6\0ISO8859-6\0",
301 ONIG_ENCODING_ISO_8859_6
302 },
303#endif
304#ifdef ONIG_ENCODING_ISO_8859_7
305 {
306 "ISO-8859-7\0ISO8859-7\0",
307 ONIG_ENCODING_ISO_8859_7
308 },
309#endif
310#ifdef ONIG_ENCODING_ISO_8859_8
311 {
312 "ISO-8859-8\0ISO8859-8\0",
313 ONIG_ENCODING_ISO_8859_8
314 },
315#endif
316#ifdef ONIG_ENCODING_ISO_8859_9
317 {
318 "ISO-8859-9\0ISO8859-9\0",
319 ONIG_ENCODING_ISO_8859_9
320 },
321#endif
322#ifdef ONIG_ENCODING_ISO_8859_10
323 {
324 "ISO-8859-10\0ISO8859-10\0",
325 ONIG_ENCODING_ISO_8859_10
326 },
327#endif
328#ifdef ONIG_ENCODING_ISO_8859_11
329 {
330 "ISO-8859-11\0ISO8859-11\0",
331 ONIG_ENCODING_ISO_8859_11
332 },
333#endif
334#ifdef ONIG_ENCODING_ISO_8859_13
335 {
336 "ISO-8859-13\0ISO8859-13\0",
337 ONIG_ENCODING_ISO_8859_13
338 },
339#endif
340#ifdef ONIG_ENCODING_ISO_8859_14
341 {
342 "ISO-8859-14\0ISO8859-14\0",
343 ONIG_ENCODING_ISO_8859_14
344 },
345#endif
346#ifdef ONIG_ENCODING_ISO_8859_15
347 {
348 "ISO-8859-15\0ISO8859-15\0",
349 ONIG_ENCODING_ISO_8859_15
350 },
351#endif
352#ifdef ONIG_ENCODING_ISO_8859_16
353 {
354 "ISO-8859-16\0ISO8859-16\0",
355 ONIG_ENCODING_ISO_8859_16
356 },
357#endif
358#ifdef ONIG_ENCODING_ASCII
359 {
360 "ASCII\0US-ASCII\0US_ASCII\0ISO646\0",
361 ONIG_ENCODING_ASCII
362 },
363#endif
364 { NULL, ONIG_ENCODING_UNDEF }
365};
366/* }}} */
367
368/* {{{ php_mb_regex_name2mbctype */
369static OnigEncoding _php_mb_regex_name2mbctype(const char *pname)
370{
371 const char *p;
372 const php_mb_regex_enc_name_map_t *mapping;
373
374 if (pname == NULL || !*pname) {
375 return ONIG_ENCODING_UNDEF;
376 }
377
378 for (mapping = enc_name_map; mapping->names != NULL; mapping++) {
379 for (p = mapping->names; *p != '\0'; p += (strlen(p) + 1)) {
380 if (strcasecmp(p, pname) == 0) {
381 return mapping->code;
382 }
383 }
384 }
385
386 return ONIG_ENCODING_UNDEF;
387}
388/* }}} */
389
390/* {{{ php_mb_regex_mbctype2name */
391static const char *_php_mb_regex_mbctype2name(OnigEncoding mbctype)
392{
393 const php_mb_regex_enc_name_map_t *mapping;
394
395 for (mapping = enc_name_map; mapping->names != NULL; mapping++) {
396 if (mapping->code == mbctype) {
397 return mapping->names;
398 }
399 }
400
401 return NULL;
402}
403/* }}} */
404
405/* {{{ php_mb_regex_set_mbctype */
406int php_mb_regex_set_mbctype(const char *encname)
407{
408 OnigEncoding mbctype = _php_mb_regex_name2mbctype(encname);
409 if (mbctype == ONIG_ENCODING_UNDEF) {
410 return FAILURE;
411 }
412 MBREX(current_mbctype) = mbctype;
413 MBREX(current_mbctype_mbfl_encoding) = mbfl_name2encoding(encname);
414 return SUCCESS;
415}
416/* }}} */
417
418/* {{{ php_mb_regex_set_default_mbctype */
419int php_mb_regex_set_default_mbctype(const char *encname)
420{
421 OnigEncoding mbctype = _php_mb_regex_name2mbctype(encname);
422 if (mbctype == ONIG_ENCODING_UNDEF) {
423 return FAILURE;
424 }
425 MBREX(default_mbctype) = mbctype;
426 return SUCCESS;
427}
428/* }}} */
429
430/* {{{ php_mb_regex_get_mbctype */
431const char *php_mb_regex_get_mbctype(void)
432{
433 return _php_mb_regex_mbctype2name(MBREX(current_mbctype));
434}
435/* }}} */
436
437/* {{{ php_mb_regex_get_mbctype_encoding */
438const mbfl_encoding *php_mb_regex_get_mbctype_encoding(void)
439{
440 return MBREX(current_mbctype_mbfl_encoding);
441}
442/* }}} */
443
444/* {{{ php_mb_regex_get_default_mbctype */
445const char *php_mb_regex_get_default_mbctype(void)
446{
447 return _php_mb_regex_mbctype2name(MBREX(default_mbctype));
448}
449/* }}} */
450
451/*
452 * regex cache
453 */
454/* {{{ php_mbregex_compile_pattern */
455static php_mb_regex_t *php_mbregex_compile_pattern(const char *pattern, size_t patlen, OnigOptionType options, OnigSyntaxType *syntax)
456{
457 int err_code = 0;
458 php_mb_regex_t *retval = NULL, *rc = NULL;
459 OnigErrorInfo err_info;
460 OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
461 OnigEncoding enc = MBREX(current_mbctype);
462
463 if (!php_mb_check_encoding(pattern, patlen, php_mb_regex_get_mbctype_encoding())) {
465 "Pattern is not valid under %s encoding", _php_mb_regex_mbctype2name(enc));
466 return NULL;
467 }
468
469 rc = zend_hash_str_find_ptr(&MBREX(ht_rc), (char *)pattern, patlen);
470 if (!rc || onig_get_options(rc) != options || onig_get_encoding(rc) != enc || onig_get_syntax(rc) != syntax) {
471 if ((err_code = onig_new(&retval, (OnigUChar *)pattern, (OnigUChar *)(pattern + patlen), options, enc, syntax, &err_info)) != ONIG_NORMAL) {
472 onig_error_code_to_str(err_str, err_code, &err_info);
473 php_error_docref(NULL, E_WARNING, "mbregex compile err: %s", err_str);
474 return NULL;
475 }
476 if (rc == MBREX(search_re)) {
477 /* reuse the new rc? see bug #72399 */
478 MBREX(search_re) = NULL;
479 }
480 zend_hash_str_update_ptr(&MBREX(ht_rc), (char *)pattern, patlen, retval);
481 } else {
482 retval = rc;
483 }
484 return retval;
485}
486/* }}} */
487
488/* {{{ _php_mb_regex_get_option_string */
489static size_t _php_mb_regex_get_option_string(char *str, size_t len, OnigOptionType option, OnigSyntaxType *syntax)
490{
491 size_t len_left = len;
492 size_t len_req = 0;
493 char *p = str;
494 char c;
495
496 if ((option & ONIG_OPTION_IGNORECASE) != 0) {
497 if (len_left > 0) {
498 --len_left;
499 *(p++) = 'i';
500 }
501 ++len_req;
502 }
503
504 if ((option & ONIG_OPTION_EXTEND) != 0) {
505 if (len_left > 0) {
506 --len_left;
507 *(p++) = 'x';
508 }
509 ++len_req;
510 }
511
512 if ((option & (ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE)) ==
513 (ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE)) {
514 if (len_left > 0) {
515 --len_left;
516 *(p++) = 'p';
517 }
518 ++len_req;
519 } else {
520 if ((option & ONIG_OPTION_MULTILINE) != 0) {
521 if (len_left > 0) {
522 --len_left;
523 *(p++) = 'm';
524 }
525 ++len_req;
526 }
527
528 if ((option & ONIG_OPTION_SINGLELINE) != 0) {
529 if (len_left > 0) {
530 --len_left;
531 *(p++) = 's';
532 }
533 ++len_req;
534 }
535 }
536 if ((option & ONIG_OPTION_FIND_LONGEST) != 0) {
537 if (len_left > 0) {
538 --len_left;
539 *(p++) = 'l';
540 }
541 ++len_req;
542 }
543 if ((option & ONIG_OPTION_FIND_NOT_EMPTY) != 0) {
544 if (len_left > 0) {
545 --len_left;
546 *(p++) = 'n';
547 }
548 ++len_req;
549 }
550
551 c = 0;
552
553 if (syntax == ONIG_SYNTAX_JAVA) {
554 c = 'j';
555 } else if (syntax == ONIG_SYNTAX_GNU_REGEX) {
556 c = 'u';
557 } else if (syntax == ONIG_SYNTAX_GREP) {
558 c = 'g';
559 } else if (syntax == ONIG_SYNTAX_EMACS) {
560 c = 'c';
561 } else if (syntax == ONIG_SYNTAX_RUBY) {
562 c = 'r';
563 } else if (syntax == ONIG_SYNTAX_PERL) {
564 c = 'z';
565 } else if (syntax == ONIG_SYNTAX_POSIX_BASIC) {
566 c = 'b';
567 } else if (syntax == ONIG_SYNTAX_POSIX_EXTENDED) {
568 c = 'd';
569 }
570
571 if (c != 0) {
572 if (len_left > 0) {
573 --len_left;
574 *(p++) = c;
575 }
576 ++len_req;
577 }
578
579
580 if (len_left > 0) {
581 --len_left;
582 *(p++) = '\0';
583 }
584 ++len_req;
585 if (len < len_req) {
586 return len_req;
587 }
588
589 return 0;
590}
591/* }}} */
592
593/* {{{ _php_mb_regex_init_options */
594static bool _php_mb_regex_init_options(const char *parg, size_t narg, OnigOptionType *option,
595 OnigSyntaxType **syntax)
596{
597 size_t n;
598 char c;
599 OnigOptionType optm = 0;
600
601 *syntax = ONIG_SYNTAX_RUBY;
602
603 if (parg != NULL) {
604 n = 0;
605 while(n < narg) {
606 c = parg[n++];
607 switch (c) {
608 case 'i':
609 optm |= ONIG_OPTION_IGNORECASE;
610 break;
611 case 'x':
612 optm |= ONIG_OPTION_EXTEND;
613 break;
614 case 'm':
615 optm |= ONIG_OPTION_MULTILINE;
616 break;
617 case 's':
618 optm |= ONIG_OPTION_SINGLELINE;
619 break;
620 case 'p':
621 optm |= ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE;
622 break;
623 case 'l':
624 optm |= ONIG_OPTION_FIND_LONGEST;
625 break;
626 case 'n':
627 optm |= ONIG_OPTION_FIND_NOT_EMPTY;
628 break;
629 case 'j':
630 *syntax = ONIG_SYNTAX_JAVA;
631 break;
632 case 'u':
633 *syntax = ONIG_SYNTAX_GNU_REGEX;
634 break;
635 case 'g':
636 *syntax = ONIG_SYNTAX_GREP;
637 break;
638 case 'c':
639 *syntax = ONIG_SYNTAX_EMACS;
640 break;
641 case 'r':
642 *syntax = ONIG_SYNTAX_RUBY;
643 break;
644 case 'z':
645 *syntax = ONIG_SYNTAX_PERL;
646 break;
647 case 'b':
648 *syntax = ONIG_SYNTAX_POSIX_BASIC;
649 break;
650 case 'd':
651 *syntax = ONIG_SYNTAX_POSIX_EXTENDED;
652 break;
653 default:
654 zend_value_error("Option \"%c\" is not supported", c);
655 return false;
656 }
657 }
658 if (option != NULL) *option|=optm;
659 }
660 return true;
661}
662/* }}} */
663
664
665/*
666 * Callbacks for named subpatterns
667 */
668
669/* {{{ struct mb_ereg_groups_iter_arg */
670typedef struct mb_regex_groups_iter_args {
671 zval *groups;
672 char *search_str;
673 size_t search_len;
674 OnigRegion *region;
675} mb_regex_groups_iter_args;
676/* }}} */
677
678/* {{{ mb_ereg_groups_iter */
679static int
680mb_regex_groups_iter(const OnigUChar* name, const OnigUChar* name_end, int ngroup_num, int* group_nums, regex_t* reg, void* parg)
681{
682 mb_regex_groups_iter_args *args = (mb_regex_groups_iter_args *) parg;
683 int gn, beg, end;
684
685 /*
686 * In case of duplicate groups, keep only the last succeeding one
687 * to be consistent with preg_match with the PCRE_DUPNAMES option.
688 */
689 gn = onig_name_to_backref_number(reg, name, name_end, args->region);
690 beg = args->region->beg[gn];
691 end = args->region->end[gn];
692 if (beg >= 0 && beg < end && ((size_t)end <= args->search_len)) {
693 add_assoc_stringl_ex(args->groups, (char *)name, name_end - name, &args->search_str[beg], end - beg);
694 } else {
695 add_assoc_bool_ex(args->groups, (char *)name, name_end - name, 0);
696 }
697
698 return 0;
699}
700/* }}} */
701
702/*
703 * Helper for _php_mb_regex_ereg_replace_exec
704 */
705/* {{{ mb_regex_substitute */
706static inline void mb_regex_substitute(
707 smart_str *pbuf,
708 const char *subject,
709 size_t subject_len,
710 char *replace,
711 size_t replace_len,
712 php_mb_regex_t *regexp,
713 OnigRegion *regs,
714 const mbfl_encoding *enc
715) {
716 char *p, *sp, *eos;
717 int no; /* bakreference group number */
718 int clen; /* byte-length of the current character */
719
720 p = replace;
721 eos = replace + replace_len;
722
723 while (p < eos) {
724 clen = (int) php_mb_mbchar_bytes(p, enc);
725 if (clen != 1 || p == eos || p[0] != '\\') {
726 /* skip anything that's not an ascii backslash */
727 smart_str_appendl(pbuf, p, clen);
728 p += clen;
729 continue;
730 }
731 sp = p; /* save position */
732 clen = (int) php_mb_mbchar_bytes(++p, enc);
733 if (clen != 1 || p == eos) {
734 /* skip backslash followed by multibyte char */
735 smart_str_appendl(pbuf, sp, p - sp);
736 continue;
737 }
738 no = -1;
739 switch (p[0]) {
740 case '0':
741 no = 0;
742 p++;
743 break;
744 case '1': case '2': case '3': case '4':
745 case '5': case '6': case '7': case '8': case '9':
746 if (!onig_noname_group_capture_is_active(regexp)) {
747 /*
748 * FIXME:
749 * Oniguruma throws a compile error if numbered backrefs are used with named groups in the pattern.
750 * For now we just ignore them, but in the future we might want to raise a warning
751 * and abort the whole replace operation.
752 */
753 p++;
754 smart_str_appendl(pbuf, sp, p - sp);
755 continue;
756 }
757 no = p[0] - '0';
758 p++;
759 break;
760 case 'k':
761 {
762 clen = (int) php_mb_mbchar_bytes(++p, enc);
763 if (clen != 1 || p == eos || (p[0] != '<' && p[0] != '\'')) {
764 /* not a backref delimiter */
765 p += clen;
766 smart_str_appendl(pbuf, sp, p - sp);
767 continue;
768 }
769 /* try to consume everything until next delimiter */
770 char delim = p[0] == '<' ? '>' : '\'';
771 char *name, *name_end;
772 char maybe_num = 1;
773 name_end = name = p + 1;
774 while (name_end < eos) {
775 clen = (int) php_mb_mbchar_bytes(name_end, enc);
776 if (clen != 1) {
777 name_end += clen;
778 maybe_num = 0;
779 continue;
780 }
781 if (name_end[0] == delim) break;
782 if (maybe_num && !isdigit(name_end[0])) maybe_num = 0;
783 name_end++;
784 }
785 p = name_end + 1;
786 if (name_end - name < 1 || name_end >= eos) {
787 /* the backref was empty or we failed to find the end delimiter */
788 smart_str_appendl(pbuf, sp, p - sp);
789 continue;
790 }
791 /* we have either a name or a number */
792 if (maybe_num) {
793 if (!onig_noname_group_capture_is_active(regexp)) {
794 /* see above note on mixing numbered & named backrefs */
795 smart_str_appendl(pbuf, sp, p - sp);
796 continue;
797 }
798 if (name_end - name == 1) {
799 no = name[0] - '0';
800 break;
801 }
802 if (name[0] == '0') {
803 /* 01 is not a valid number */
804 break;
805 }
806 no = (int) strtoul(name, NULL, 10);
807 break;
808 }
809 no = onig_name_to_backref_number(regexp, (OnigUChar *)name, (OnigUChar *)name_end, regs);
810 break;
811 }
812 default:
813 /* We're not treating \ as an escape character and will interpret something like
814 * \\1 as \ followed by \1, rather than \\ followed by 1. This is because this
815 * function has not supported escaping of backslashes historically. */
816 smart_str_appendl(pbuf, sp, p - sp);
817 continue;
818 }
819 if (no < 0 || no >= regs->num_regs) {
820 /* invalid group number reference, keep the escape sequence in the output */
821 smart_str_appendl(pbuf, sp, p - sp);
822 continue;
823 }
824 if (regs->beg[no] >= 0 && regs->beg[no] < regs->end[no] && (size_t)regs->end[no] <= subject_len) {
825 smart_str_appendl(pbuf, subject + regs->beg[no], regs->end[no] - regs->beg[no]);
826 }
827 }
828
829 if (p < eos) {
830 smart_str_appendl(pbuf, p, eos - p);
831 }
832}
833/* }}} */
834
835/*
836 * php functions
837 */
838
839/* {{{ Returns the current encoding for regex as a string. */
841{
842 char *encoding = NULL;
843 size_t encoding_len;
844
845 if (zend_parse_parameters(ZEND_NUM_ARGS(), "|s!", &encoding, &encoding_len) == FAILURE) {
847 }
848
849 if (!encoding) {
850 const char *retval = php_mb_regex_get_mbctype();
852
854 } else {
855 if (php_mb_regex_set_mbctype(encoding) == FAILURE) {
856 zend_argument_value_error(1, "must be a valid encoding, \"%s\" given", encoding);
858 }
859
860 /* TODO Make function return previous encoding? */
862 }
863}
864/* }}} */
865
866/* {{{ _php_mb_onig_search */
867static int _php_mb_onig_search(regex_t* reg, const OnigUChar* str, const OnigUChar* end, const OnigUChar* start,
868 const OnigUChar* range, OnigRegion* region, OnigOptionType option) {
869 OnigMatchParam *mp = onig_new_match_param();
870 int err;
871 onig_initialize_match_param(mp);
872 if (!ZEND_LONG_UINT_OVFL(MBSTRG(regex_stack_limit))) {
873 onig_set_match_stack_limit_size_of_match_param(mp, (unsigned int)MBSTRG(regex_stack_limit));
874 }
875 if (!ZEND_LONG_UINT_OVFL(MBSTRG(regex_retry_limit))) {
876 onig_set_retry_limit_in_match_of_match_param(mp, (unsigned int)MBSTRG(regex_retry_limit));
877 }
878 /* search */
879 err = onig_search_with_param(reg, str, end, start, range, region, option, mp);
880 onig_free_match_param(mp);
881 return err;
882}
883/* }}} */
884
885
886/* {{{ _php_mb_regex_ereg_exec */
887static void _php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAMETERS, int icase)
888{
889 zval *array = NULL;
890 char *arg_pattern, *string;
891 size_t arg_pattern_len, string_len;
892 php_mb_regex_t *re;
893 OnigRegion *regs = NULL;
894 int i, beg, end;
895 OnigOptionType options;
896 char *str;
897
898 if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|z", &arg_pattern, &arg_pattern_len, &string, &string_len, &array) == FAILURE) {
900 }
901
902 if (arg_pattern_len == 0) {
905 }
906
907 if (array != NULL) {
908 array = zend_try_array_init(array);
909 if (!array) {
911 }
912 }
913
915 string,
916 string_len,
917 php_mb_regex_get_mbctype_encoding()
918 )) {
920 }
921
922 options = MBREX(regex_default_options);
923 if (icase) {
924 options |= ONIG_OPTION_IGNORECASE;
925 }
926
927 re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, options, MBREX(regex_default_syntax));
928 if (re == NULL) {
930 goto out;
931 }
932
933 regs = onig_region_new();
934
935 /* actually execute the regular expression */
936 if (_php_mb_onig_search(re, (OnigUChar *)string, (OnigUChar *)(string + string_len), (OnigUChar *)string, (OnigUChar *)(string + string_len), regs, 0) < 0) {
938 goto out;
939 }
940
941 str = string;
942 if (array != NULL) {
943 for (i = 0; i < regs->num_regs; i++) {
944 beg = regs->beg[i];
945 end = regs->end[i];
946 if (beg >= 0 && beg < end && (size_t)end <= string_len) {
947 add_index_stringl(array, i, (char *)&str[beg], end - beg);
948 } else {
949 add_index_bool(array, i, 0);
950 }
951 }
952
953 if (onig_number_of_names(re) > 0) {
954 mb_regex_groups_iter_args args = {array, string, string_len, regs};
955 onig_foreach_name(re, mb_regex_groups_iter, &args);
956 }
957 }
958
960out:
961 if (regs != NULL) {
962 onig_region_free(regs, 1);
963 }
964}
965/* }}} */
966
967/* {{{ Regular expression match for multibyte string */
969{
970 _php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
971}
972/* }}} */
973
974/* {{{ Case-insensitive regular expression match for multibyte string */
976{
977 _php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
978}
979/* }}} */
980
981/* {{{ _php_mb_regex_ereg_replace_exec */
982static void _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, OnigOptionType options, int is_callable)
983{
984 char *arg_pattern;
985 size_t arg_pattern_len;
986
987 char *replace;
988 size_t replace_len;
989
990 zend_fcall_info arg_replace_fci;
991 zend_fcall_info_cache arg_replace_fci_cache;
992
993 char *string;
994 size_t string_len;
995
996 php_mb_regex_t *re;
997 OnigSyntaxType *syntax;
998 OnigRegion *regs = NULL;
999 smart_str out_buf = {0};
1000 smart_str eval_buf = {0};
1001 smart_str *pbuf;
1002 int err, n;
1003 OnigUChar *pos;
1004 OnigUChar *string_lim;
1005 char *description = NULL;
1006
1007 const mbfl_encoding *enc = php_mb_regex_get_mbctype_encoding();
1008 ZEND_ASSERT(enc != NULL);
1009
1010 {
1011 char *option_str = NULL;
1012 size_t option_str_len = 0;
1013
1014 if (!is_callable) {
1015 if (zend_parse_parameters(ZEND_NUM_ARGS(), "sss|s!",
1016 &arg_pattern, &arg_pattern_len,
1017 &replace, &replace_len,
1018 &string, &string_len,
1019 &option_str, &option_str_len) == FAILURE) {
1020 RETURN_THROWS();
1021 }
1022 } else {
1023 if (zend_parse_parameters(ZEND_NUM_ARGS(), "sfs|s!",
1024 &arg_pattern, &arg_pattern_len,
1025 &arg_replace_fci, &arg_replace_fci_cache,
1026 &string, &string_len,
1027 &option_str, &option_str_len) == FAILURE) {
1028 RETURN_THROWS();
1029 }
1030 }
1031
1032 if (!php_mb_check_encoding(string, string_len, enc)) {
1033 RETURN_NULL();
1034 }
1035
1036 if (option_str != NULL) {
1037 /* Initialize option and in case of failure it means there is a value error */
1038 if (!_php_mb_regex_init_options(option_str, option_str_len, &options, &syntax)) {
1039 RETURN_THROWS();
1040 }
1041 } else {
1042 options |= MBREX(regex_default_options);
1043 syntax = MBREX(regex_default_syntax);
1044 }
1045 }
1046
1047 /* create regex pattern buffer */
1048 re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, options, syntax);
1049 if (re == NULL) {
1051 }
1052
1053 if (is_callable) {
1054 pbuf = &eval_buf;
1055 description = zend_make_compiled_string_description("mbregex replace");
1056 } else {
1057 pbuf = &out_buf;
1058 description = NULL;
1059 }
1060
1061 /* do the actual work */
1062 err = 0;
1063 pos = (OnigUChar *)string;
1064 string_lim = (OnigUChar*)(string + string_len);
1065 regs = onig_region_new();
1066 while (err >= 0) {
1067 err = _php_mb_onig_search(re, (OnigUChar *)string, (OnigUChar *)string_lim, pos, (OnigUChar *)string_lim, regs, 0);
1068 if (err <= -2) {
1069 OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
1070 onig_error_code_to_str(err_str, err);
1071 php_error_docref(NULL, E_WARNING, "mbregex search failure in php_mbereg_replace_exec(): %s", err_str);
1072 break;
1073 }
1074 if (err >= 0) {
1075 /* copy the part of the string before the match */
1076 smart_str_appendl(&out_buf, (char *)pos, (size_t)((OnigUChar *)(string + regs->beg[0]) - pos));
1077
1078 if (!is_callable) {
1079 mb_regex_substitute(pbuf, string, string_len, replace, replace_len, re, regs, enc);
1080 }
1081
1082 if (is_callable) {
1083 zval args[1];
1084 zval subpats, retval;
1085 int i;
1086
1087 array_init(&subpats);
1088 for (i = 0; i < regs->num_regs; i++) {
1089 add_next_index_stringl(&subpats, string + regs->beg[i], regs->end[i] - regs->beg[i]);
1090 }
1091 if (onig_number_of_names(re) > 0) {
1092 mb_regex_groups_iter_args args = {&subpats, string, string_len, regs};
1093 onig_foreach_name(re, mb_regex_groups_iter, &args);
1094 }
1095
1096 ZVAL_COPY_VALUE(&args[0], &subpats);
1097 /* null terminate buffer */
1098 smart_str_0(&eval_buf);
1099
1100 arg_replace_fci.param_count = 1;
1101 arg_replace_fci.params = args;
1102 arg_replace_fci.retval = &retval;
1103 if (zend_call_function(&arg_replace_fci, &arg_replace_fci_cache) == SUCCESS &&
1104 !Z_ISUNDEF(retval)) {
1106 smart_str_appendl(&out_buf, Z_STRVAL(retval), Z_STRLEN(retval));
1107 smart_str_free(&eval_buf);
1109 } else {
1110 if (!EG(exception)) {
1111 zend_throw_error(NULL, "Unable to call custom replacement function");
1112 zval_ptr_dtor(&subpats);
1113 RETURN_THROWS();
1114 }
1115 }
1116 zval_ptr_dtor(&subpats);
1117 }
1118
1119 n = regs->end[0];
1120 if ((pos - (OnigUChar *)string) < n) {
1121 pos = (OnigUChar *)string + n;
1122 } else {
1123 if (pos < string_lim) {
1124 smart_str_appendl(&out_buf, (char *)pos, 1);
1125 }
1126 pos++;
1127 }
1128 } else { /* nomatch */
1129 /* stick that last bit of string on our output */
1130 if (string_lim - pos > 0) {
1131 smart_str_appendl(&out_buf, (char *)pos, string_lim - pos);
1132 }
1133 }
1134 onig_region_free(regs, 0);
1135 }
1136
1137 if (description) {
1138 efree(description);
1139 }
1140 if (regs != NULL) {
1141 onig_region_free(regs, 1);
1142 }
1143 smart_str_free(&eval_buf);
1144
1145 if (err <= -2) {
1146 smart_str_free(&out_buf);
1148 }
1149
1150 RETURN_STR(smart_str_extract(&out_buf));
1151}
1152/* }}} */
1153
1154/* {{{ Replace regular expression for multibyte string */
1156{
1157 _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0, 0);
1158}
1159/* }}} */
1160
1161/* {{{ Case insensitive replace regular expression for multibyte string */
1163{
1164 _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, ONIG_OPTION_IGNORECASE, 0);
1165}
1166/* }}} */
1167
1168/* {{{ regular expression for multibyte string using replacement callback */
1170{
1171 _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0, 1);
1172}
1173/* }}} */
1174
1175/* {{{ split multibyte string into array by regular expression */
1177{
1178 char *arg_pattern;
1179 size_t arg_pattern_len;
1180 php_mb_regex_t *re;
1181 OnigRegion *regs = NULL;
1182 char *string;
1183 OnigUChar *pos, *chunk_pos;
1184 size_t string_len;
1185
1186 int err;
1187 zend_long count = -1;
1188
1189 if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|l", &arg_pattern, &arg_pattern_len, &string, &string_len, &count) == FAILURE) {
1190 RETURN_THROWS();
1191 }
1192
1193 if (count > 0) {
1194 count--;
1195 }
1196
1197 if (!php_mb_check_encoding(string, string_len, php_mb_regex_get_mbctype_encoding())) {
1199 }
1200
1201 /* create regex pattern buffer */
1202 if ((re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, MBREX(regex_default_options), MBREX(regex_default_syntax))) == NULL) {
1204 }
1205
1207
1208 chunk_pos = pos = (OnigUChar *)string;
1209 err = 0;
1210 regs = onig_region_new();
1211 /* churn through str, generating array entries as we go */
1212 while (count != 0 && (size_t)(pos - (OnigUChar *)string) < string_len) {
1213 size_t beg, end;
1214 err = _php_mb_onig_search(re, (OnigUChar *)string, (OnigUChar *)(string + string_len), pos, (OnigUChar *)(string + string_len), regs, 0);
1215 if (err < 0) {
1216 break;
1217 }
1218 beg = regs->beg[0], end = regs->end[0];
1219 /* add it to the array */
1220 if ((size_t)(pos - (OnigUChar *)string) < end) {
1221 if (beg < string_len && beg >= (size_t)(chunk_pos - (OnigUChar *)string)) {
1222 add_next_index_stringl(return_value, (char *)chunk_pos, ((OnigUChar *)(string + beg) - chunk_pos));
1223 --count;
1224 } else {
1225 err = -2;
1226 break;
1227 }
1228 /* point at our new starting point */
1229 chunk_pos = pos = (OnigUChar *)string + end;
1230 } else {
1231 pos++;
1232 }
1233 onig_region_free(regs, 0);
1234 }
1235
1236 onig_region_free(regs, 1);
1237
1238 /* see if we encountered an error */
1239 // ToDo investigate if this can actually/should happen ...
1240 if (err <= -2) {
1241 OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
1242 onig_error_code_to_str(err_str, err);
1243 php_error_docref(NULL, E_WARNING, "mbregex search failure in mbsplit(): %s", err_str);
1246 }
1247
1248 /* otherwise we just have one last element to add to the array */
1249 if ((OnigUChar *)(string + string_len) > chunk_pos) {
1250 size_t n = ((OnigUChar *)(string + string_len) - chunk_pos);
1251 add_next_index_stringl(return_value, (char *)chunk_pos, n);
1252 } else {
1254 }
1255}
1256/* }}} */
1257
1258/* {{{ Regular expression match for multibyte string */
1260{
1261 char *arg_pattern;
1262 size_t arg_pattern_len;
1263
1264 char *string;
1265 size_t string_len;
1266
1267 php_mb_regex_t *re;
1268 OnigSyntaxType *syntax;
1269 OnigOptionType option = 0;
1270 int err;
1271 OnigMatchParam *mp;
1272
1273 {
1274 char *option_str = NULL;
1275 size_t option_str_len = 0;
1276
1277 if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|s!",
1278 &arg_pattern, &arg_pattern_len, &string, &string_len,
1279 &option_str, &option_str_len)==FAILURE) {
1280 RETURN_THROWS();
1281 }
1282
1283 if (option_str != NULL) {
1284 if(!_php_mb_regex_init_options(option_str, option_str_len, &option, &syntax)) {
1285 RETURN_THROWS();
1286 }
1287 } else {
1288 option |= MBREX(regex_default_options);
1289 syntax = MBREX(regex_default_syntax);
1290 }
1291 }
1292
1293 if (!php_mb_check_encoding(string, string_len, php_mb_regex_get_mbctype_encoding())) {
1295 }
1296
1297 if ((re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, option, syntax)) == NULL) {
1299 }
1300
1301 mp = onig_new_match_param();
1302 onig_initialize_match_param(mp);
1303 if (MBSTRG(regex_stack_limit) > 0 && MBSTRG(regex_stack_limit) < UINT_MAX) {
1304 onig_set_match_stack_limit_size_of_match_param(mp, (unsigned int)MBSTRG(regex_stack_limit));
1305 }
1306 if (MBSTRG(regex_retry_limit) > 0 && MBSTRG(regex_retry_limit) < UINT_MAX) {
1307 onig_set_retry_limit_in_match_of_match_param(mp, (unsigned int)MBSTRG(regex_retry_limit));
1308 }
1309 /* match */
1310 err = onig_match_with_param(re, (OnigUChar *)string, (OnigUChar *)(string + string_len), (OnigUChar *)string, NULL, 0, mp);
1311 onig_free_match_param(mp);
1312 if (err >= 0) {
1314 } else {
1316 }
1317}
1318/* }}} */
1319
1320/* regex search */
1321/* {{{ _php_mb_regex_ereg_search_exec */
1322static void _php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAMETERS, int mode)
1323{
1324 char *arg_pattern = NULL, *arg_options = NULL;
1325 size_t arg_pattern_len, arg_options_len;
1326 int err;
1327 size_t n, i, pos, len;
1328 /* Stored as int* in the OnigRegion struct */
1329 int beg, end;
1330 OnigOptionType option = 0;
1331 OnigUChar *str;
1332 OnigSyntaxType *syntax;
1333
1334 if (zend_parse_parameters(ZEND_NUM_ARGS(), "|s!s!", &arg_pattern, &arg_pattern_len, &arg_options, &arg_options_len) == FAILURE) {
1335 RETURN_THROWS();
1336 }
1337
1338 if (arg_options) {
1339 _php_mb_regex_init_options(arg_options, arg_options_len, &option, &syntax);
1340 } else {
1341 option |= MBREX(regex_default_options);
1342 syntax = MBREX(regex_default_syntax);
1343 }
1344
1345 if (MBREX(search_regs)) {
1346 onig_region_free(MBREX(search_regs), 1);
1347 MBREX(search_regs) = NULL;
1348 }
1349
1350 if (arg_pattern) {
1351 /* create regex pattern buffer */
1352 if ((MBREX(search_re) = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, option, syntax)) == NULL) {
1354 }
1355 }
1356
1357 pos = MBREX(search_pos);
1358 str = NULL;
1359 len = 0;
1360 if (!Z_ISUNDEF(MBREX(search_str)) && Z_TYPE(MBREX(search_str)) == IS_STRING){
1361 str = (OnigUChar *)Z_STRVAL(MBREX(search_str));
1362 len = Z_STRLEN(MBREX(search_str));
1363 }
1364
1365 if (MBREX(search_re) == NULL) {
1366 zend_throw_error(NULL, "No pattern was provided");
1367 RETURN_THROWS();
1368 }
1369
1370 if (str == NULL) {
1371 zend_throw_error(NULL, "No string was provided");
1372 RETURN_THROWS();
1373 }
1374
1375 MBREX(search_regs) = onig_region_new();
1376
1377 err = _php_mb_onig_search(MBREX(search_re), str, str + len, str + pos, str + len, MBREX(search_regs), 0);
1378 if (err == ONIG_MISMATCH) {
1379 MBREX(search_pos) = len;
1381 } else if (err <= -2) {
1382 OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
1383 onig_error_code_to_str(err_str, err);
1384 php_error_docref(NULL, E_WARNING, "mbregex search failure in mbregex_search(): %s", err_str);
1386 } else {
1387 switch (mode) {
1388 case 1:
1390 beg = MBREX(search_regs)->beg[0];
1391 end = MBREX(search_regs)->end[0];
1394 break;
1395 case 2:
1397 n = MBREX(search_regs)->num_regs;
1398 for (i = 0; i < n; i++) {
1399 beg = MBREX(search_regs)->beg[i];
1400 end = MBREX(search_regs)->end[i];
1401 if (beg >= 0 && beg <= end && end <= len) {
1402 add_index_stringl(return_value, i, (char *)&str[beg], end - beg);
1403 } else {
1405 }
1406 }
1407 if (onig_number_of_names(MBREX(search_re)) > 0) {
1408 mb_regex_groups_iter_args args = {
1410 Z_STRVAL(MBREX(search_str)),
1411 Z_STRLEN(MBREX(search_str)),
1412 MBREX(search_regs)
1413 };
1414 onig_foreach_name(MBREX(search_re), mb_regex_groups_iter, &args);
1415 }
1416 break;
1417 default:
1419 break;
1420 }
1421 end = MBREX(search_regs)->end[0];
1422 if (pos <= end) {
1423 MBREX(search_pos) = end;
1424 } else {
1425 MBREX(search_pos) = pos + 1;
1426 }
1427 }
1428
1429 if (err < 0) {
1430 onig_region_free(MBREX(search_regs), 1);
1431 MBREX(search_regs) = (OnigRegion *)NULL;
1432 }
1433}
1434/* }}} */
1435
1436/* {{{ Regular expression search for multibyte string */
1438{
1439 _php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
1440}
1441/* }}} */
1442
1443/* {{{ Regular expression search for multibyte string */
1445{
1446 _php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
1447}
1448/* }}} */
1449
1450/* {{{ Regular expression search for multibyte string */
1452{
1453 _php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 2);
1454}
1455/* }}} */
1456
1457/* {{{ Initialize string and regular expression for search. */
1459{
1460 zend_string *arg_str;
1461 char *arg_pattern = NULL, *arg_options = NULL;
1462 size_t arg_pattern_len = 0, arg_options_len = 0;
1463 OnigSyntaxType *syntax = NULL;
1464 OnigOptionType option;
1465
1466 if (zend_parse_parameters(ZEND_NUM_ARGS(), "S|s!s!", &arg_str, &arg_pattern, &arg_pattern_len, &arg_options, &arg_options_len) == FAILURE) {
1467 RETURN_THROWS();
1468 }
1469
1470 if (arg_pattern && arg_pattern_len == 0) {
1472 RETURN_THROWS();
1473 }
1474
1475 if (arg_options) {
1476 option = 0;
1477 _php_mb_regex_init_options(arg_options, arg_options_len, &option, &syntax);
1478 } else {
1479 option = MBREX(regex_default_options);
1480 syntax = MBREX(regex_default_syntax);
1481 }
1482
1483 if (arg_pattern) {
1484 /* create regex pattern buffer */
1485 if ((MBREX(search_re) = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, option, syntax)) == NULL) {
1487 }
1488 }
1489
1490 if (!Z_ISNULL(MBREX(search_str))) {
1491 zval_ptr_dtor(&MBREX(search_str));
1492 }
1493
1494 ZVAL_STR_COPY(&MBREX(search_str), arg_str);
1495
1496 if (php_mb_check_encoding(ZSTR_VAL(arg_str), ZSTR_LEN(arg_str), php_mb_regex_get_mbctype_encoding())) {
1497 MBREX(search_pos) = 0;
1499 } else {
1500 MBREX(search_pos) = ZSTR_LEN(arg_str);
1502 }
1503
1504 if (MBREX(search_regs) != NULL) {
1505 onig_region_free(MBREX(search_regs), 1);
1506 MBREX(search_regs) = NULL;
1507 }
1508}
1509/* }}} */
1510
1511/* {{{ Get matched substring of the last time */
1513{
1514 size_t n, i, len;
1515 /* Stored as int* in the OnigRegion struct */
1516 int beg, end;
1517 OnigUChar *str;
1518
1520 RETURN_THROWS();
1521 }
1522
1523 if (MBREX(search_regs) != NULL && Z_TYPE(MBREX(search_str)) == IS_STRING) {
1525
1526 str = (OnigUChar *)Z_STRVAL(MBREX(search_str));
1527 len = Z_STRLEN(MBREX(search_str));
1528 n = MBREX(search_regs)->num_regs;
1529 for (i = 0; i < n; i++) {
1530 beg = MBREX(search_regs)->beg[i];
1531 end = MBREX(search_regs)->end[i];
1532 if (beg >= 0 && beg <= end && end <= len) {
1533 add_index_stringl(return_value, i, (char *)&str[beg], end - beg);
1534 } else {
1536 }
1537 }
1538 if (onig_number_of_names(MBREX(search_re)) > 0) {
1539 mb_regex_groups_iter_args args = {
1541 Z_STRVAL(MBREX(search_str)),
1542 len,
1543 MBREX(search_regs)
1544 };
1545 onig_foreach_name(MBREX(search_re), mb_regex_groups_iter, &args);
1546 }
1547 } else {
1548 // TODO This seems to be some logical error, promote to Error
1550 }
1551}
1552/* }}} */
1553
1554/* {{{ Get search start position */
1556{
1558 RETURN_THROWS();
1559 }
1560
1561 RETVAL_LONG(MBREX(search_pos));
1562}
1563/* }}} */
1564
1565/* {{{ Set search start position */
1567{
1568 zend_long position;
1569
1570 if (zend_parse_parameters(ZEND_NUM_ARGS(), "l", &position) == FAILURE) {
1571 RETURN_THROWS();
1572 }
1573
1574 /* Accept negative position if length of search string can be determined */
1575 if ((position < 0) && (!Z_ISUNDEF(MBREX(search_str))) && (Z_TYPE(MBREX(search_str)) == IS_STRING)) {
1576 position += Z_STRLEN(MBREX(search_str));
1577 }
1578
1579 if (position < 0 || (!Z_ISUNDEF(MBREX(search_str)) && Z_TYPE(MBREX(search_str)) == IS_STRING && (size_t)position > Z_STRLEN(MBREX(search_str)))) {
1580 zend_argument_value_error(1, "is out of range");
1581 RETURN_THROWS();
1582 }
1583
1584 MBREX(search_pos) = position;
1585 // TODO Return void
1587}
1588/* }}} */
1589
1590/* {{{ php_mb_regex_set_options */
1591static void _php_mb_regex_set_options(OnigOptionType options, OnigSyntaxType *syntax, OnigOptionType *prev_options, OnigSyntaxType **prev_syntax)
1592{
1593 if (prev_options != NULL) {
1594 *prev_options = MBREX(regex_default_options);
1595 }
1596 if (prev_syntax != NULL) {
1597 *prev_syntax = MBREX(regex_default_syntax);
1598 }
1599 MBREX(regex_default_options) = options;
1600 MBREX(regex_default_syntax) = syntax;
1601}
1602/* }}} */
1603
1604/* {{{ Set or get the default options for mbregex functions */
1606{
1607 OnigOptionType opt, prev_opt;
1608 OnigSyntaxType *syntax, *prev_syntax;
1609 char *string = NULL;
1610 size_t string_len;
1611 char buf[16];
1612
1614 &string, &string_len) == FAILURE) {
1615 RETURN_THROWS();
1616 }
1617 if (string != NULL) {
1618 opt = 0;
1619 syntax = NULL;
1620 if(!_php_mb_regex_init_options(string, string_len, &opt, &syntax)) {
1621 RETURN_THROWS();
1622 }
1623 _php_mb_regex_set_options(opt, syntax, &prev_opt, &prev_syntax);
1624 opt = prev_opt;
1625 syntax = prev_syntax;
1626 } else {
1627 opt = MBREX(regex_default_options);
1628 syntax = MBREX(regex_default_syntax);
1629 }
1630 _php_mb_regex_get_option_string(buf, sizeof(buf), opt, syntax);
1631
1633}
1634/* }}} */
1635
1636#endif /* HAVE_MBREGEX */
size_t len
Definition apprentice.c:174
bool exception
Definition assert.c:30
is_callable(mixed $value, bool $syntax_only=false, &$callable_name=null)
count(Countable|array $value, int $mode=COUNT_NORMAL)
zend_long n
Definition ffi.c:4979
char * err
Definition ffi.c:3029
buf start
Definition ffi.c:4687
zend_ffi_ctype_name_buf buf
Definition ffi.c:4685
char * mode
#define NULL
Definition gdcache.h:45
#define SUCCESS
Definition hash_sha3.c:261
PHPAPI ZEND_COLD void php_error_docref(const char *docref, int type, const char *format,...)
Definition main.c:1173
const mbfl_encoding mbfl_encoding_utf8
const mbfl_encoding * mbfl_name2encoding(const char *name)
MBSTRING_API size_t php_mb_mbchar_bytes(const char *s, const mbfl_encoding *enc)
Definition mbstring.c:5994
MBSTRING_API bool php_mb_check_encoding(const char *input, size_t length, const mbfl_encoding *encoding)
Definition mbstring.c:4872
#define MBSTRG(v)
Definition mbstring.h:118
mb_split(string $pattern, string $string, int $limit=-1)
mb_eregi_replace(string $pattern, string $replacement, string $string, ?string $options=null)
mb_ereg_search_getregs()
mb_ereg_search_setpos(int $offset)
mb_ereg_search_pos(?string $pattern=null, ?string $options=null)
mb_ereg_match(string $pattern, string $string, ?string $options=null)
mb_ereg_search(?string $pattern=null, ?string $options=null)
mb_regex_encoding(?string $encoding=null)
mb_ereg_replace(string $pattern, string $replacement, string $string, ?string $options=null)
mb_ereg(string $pattern, string $string, &$matches=null)
mb_ereg_replace_callback(string $pattern, callable $callback, string $string, ?string $options=null)
mb_ereg_search_regs(?string $pattern=null, ?string $options=null)
mb_ereg_search_getpos()
mb_regex_set_options(?string $options=null)
mb_eregi(string $pattern, string $string, &$matches=null)
mb_ereg_search_init(string $string, ?string $pattern=null, ?string $options=null)
php_info_print_table_start()
Definition info.c:1064
php_info_print_table_row(2, "PDO Driver for Firebird", "enabled")
php_info_print_table_end()
Definition info.c:1074
#define PHP_FUNCTION
Definition php.h:364
#define PHP_MSHUTDOWN_FUNCTION
Definition php.h:401
#define PHP_MINIT_FUNCTION
Definition php.h:400
#define PHP_MINFO_FUNCTION
Definition php.h:404
#define PHP_RINIT_FUNCTION
Definition php.h:402
#define PHP_RSHUTDOWN_FUNCTION
Definition php.h:403
unsigned const char * end
Definition php_ffi.h:51
unsigned const char * pos
Definition php_ffi.h:52
PHP_JSON_API size_t int options
Definition php_json.h:102
#define regex_t
xmlCharEncodingHandlerPtr encoding
Definition php_soap.h:170
p
Definition session.c:1105
uint32_t param_count
Definition zend_API.h:51
ZEND_API char * zend_make_compiled_string_description(const char *name)
Definition zend.c:1980
ZEND_API ZEND_COLD void zend_throw_error(zend_class_entry *exception_ce, const char *format,...)
Definition zend.c:1772
ZEND_API ZEND_COLD void zend_value_error(const char *format,...)
Definition zend.c:1849
#define INTERNAL_FUNCTION_PARAMETERS
Definition zend.h:49
#define INTERNAL_FUNCTION_PARAM_PASSTHRU
Definition zend.h:50
ZEND_API void add_assoc_bool_ex(zval *arg, const char *key, size_t key_len, bool b)
Definition zend_API.c:1946
ZEND_API zend_result add_next_index_stringl(zval *arg, const char *str, size_t length)
Definition zend_API.c:2195
ZEND_API zend_result add_next_index_long(zval *arg, zend_long n)
Definition zend_API.c:2132
ZEND_API void add_index_stringl(zval *arg, zend_ulong index, const char *str, size_t length)
Definition zend_API.c:2096
ZEND_API zend_result zend_parse_parameters(uint32_t num_args, const char *type_spec,...)
Definition zend_API.c:1300
ZEND_API void add_index_bool(zval *arg, zend_ulong index, bool b)
Definition zend_API.c:2051
ZEND_API void add_assoc_stringl_ex(zval *arg, const char *key, size_t key_len, const char *str, size_t length)
Definition zend_API.c:1991
ZEND_API ZEND_COLD void zend_argument_must_not_be_empty_error(uint32_t arg_num)
Definition zend_API.c:443
ZEND_API ZEND_COLD void zend_argument_value_error(uint32_t arg_num, const char *format,...)
Definition zend_API.c:433
#define ZEND_NUM_ARGS()
Definition zend_API.h:530
struct _zend_fcall_info_cache zend_fcall_info_cache
#define RETURN_STRING(s)
Definition zend_API.h:1043
#define RETURN_FALSE
Definition zend_API.h:1058
#define RETVAL_STRING(s)
Definition zend_API.h:1017
#define RETURN_NULL()
Definition zend_API.h:1036
#define zend_parse_parameters_none()
Definition zend_API.h:353
struct _zend_fcall_info zend_fcall_info
#define RETURN_THROWS()
Definition zend_API.h:1060
#define RETVAL_TRUE
Definition zend_API.h:1033
#define RETURN_STR(s)
Definition zend_API.h:1039
#define RETVAL_LONG(l)
Definition zend_API.h:1011
#define ZEND_EXTERN_MODULE_GLOBALS(module_name)
Definition zend_API.h:270
#define RETVAL_FALSE
Definition zend_API.h:1032
ZEND_API zend_result zend_call_function(zend_fcall_info *fci, zend_fcall_info_cache *fci_cache)
#define RETURN_TRUE
Definition zend_API.h:1059
#define array_init(arg)
Definition zend_API.h:537
#define efree(ptr)
Definition zend_alloc.h:155
#define pefree(ptr, persistent)
Definition zend_alloc.h:191
#define pemalloc(size, persistent)
Definition zend_alloc.h:189
struct _zval_struct zval
strlen(string $string)
zval * args
#define strcasecmp(s1, s2)
#define snprintf
#define E_WARNING
Definition zend_errors.h:24
#define EG(v)
ZEND_API void ZEND_FASTCALL zend_hash_destroy(HashTable *ht)
Definition zend_hash.c:1727
ZEND_API void ZEND_FASTCALL zend_array_destroy(HashTable *ht)
Definition zend_hash.c:1808
#define zend_hash_init(ht, nSize, pHashFunction, pDestructor, persistent)
Definition zend_hash.h:108
int32_t zend_long
Definition zend_long.h:42
struct _zend_string zend_string
#define convert_to_string(op)
#define ZEND_ASSERT(c)
#define ZEND_LONG_UINT_OVFL(zlong)
#define ZSTR_VAL(zstr)
Definition zend_string.h:68
#define ZSTR_LEN(zstr)
Definition zend_string.h:69
#define ZVAL_UNDEF(z)
#define IS_STRING
Definition zend_types.h:606
#define ZVAL_STR_COPY(z, s)
struct _zend_array HashTable
Definition zend_types.h:386
#define Z_ISUNDEF(zval)
Definition zend_types.h:956
#define Z_PTR_P(zval_p)
#define Z_STRVAL(zval)
Definition zend_types.h:974
@ FAILURE
Definition zend_types.h:61
#define Z_STRLEN(zval)
Definition zend_types.h:977
#define Z_TYPE(zval)
Definition zend_types.h:659
#define Z_ISNULL(zval)
Definition zend_types.h:959
#define Z_ARR_P(zval_p)
Definition zend_types.h:984
#define ZVAL_COPY_VALUE(z, v)
ZEND_API void zval_ptr_dtor(zval *zval_ptr)
zval retval
zval * return_value
zend_string * name
out($f, $s)