php-internal-docs 8.4.8
Unofficial docs for php/php-src
Loading...
Searching...
No Matches
grapheme_string.c
Go to the documentation of this file.
1/*
2 +----------------------------------------------------------------------+
3 | This source file is subject to version 3.01 of the PHP license, |
4 | that is bundled with this package in the file LICENSE, and is |
5 | available through the world-wide-web at the following url: |
6 | https://www.php.net/license/3_01.txt |
7 | If you did not receive a copy of the PHP license and are unable to |
8 | obtain it through the world-wide-web, please send a note to |
9 | license@php.net so we can mail you a copy immediately. |
10 +----------------------------------------------------------------------+
11 | Author: Ed Batutis <ed@batutis.com> |
12 +----------------------------------------------------------------------+
13 */
14
15/* {{{ includes */
16#ifdef HAVE_CONFIG_H
17#include <config.h>
18#endif
19
20#include <php.h>
21#include "grapheme.h"
22#include "grapheme_util.h"
23
24#include <unicode/utypes.h>
25#include <unicode/utf8.h>
26#include <unicode/ucol.h>
27#include <unicode/ustring.h>
28#include <unicode/ubrk.h>
29
30/* }}} */
31
32/* {{{ Get number of graphemes in a string */
34{
35 char* string;
36 size_t string_len;
37 UChar* ustring = NULL;
38 int ustring_len = 0;
39 zend_long ret_len;
40 UErrorCode status;
41
43 Z_PARAM_STRING(string, string_len)
45
46 ret_len = grapheme_ascii_check((unsigned char *)string, string_len);
47
48 if ( ret_len >= 0 )
49 RETURN_LONG(string_len);
50
51 /* convert the string to UTF-16. */
53 intl_convert_utf8_to_utf16(&ustring, &ustring_len, string, string_len, &status );
54
55 if ( U_FAILURE( status ) ) {
56 /* Set global error code. */
58
59 /* Set error messages. */
60 intl_error_set_custom_msg( NULL, "Error converting input string to UTF-16", 0 );
61 if (ustring) {
62 efree( ustring );
63 }
65 }
66
67 ret_len = grapheme_split_string(ustring, ustring_len, NULL, 0 );
68
69 if (ustring) {
70 efree( ustring );
71 }
72
73 if (ret_len >= 0) {
74 RETVAL_LONG(ret_len);
75 } else {
77 }
78}
79/* }}} */
80
81/* {{{ Find position of first occurrence of a string within another */
83{
84 char *haystack, *needle;
85 size_t haystack_len, needle_len;
86 const char *found;
87 zend_long loffset = 0;
88 int32_t offset = 0;
89 size_t noffset = 0;
90 zend_long ret_pos;
91
93 Z_PARAM_STRING(haystack, haystack_len)
94 Z_PARAM_STRING(needle, needle_len)
96 Z_PARAM_LONG(loffset)
98
99 if ( OUTSIDE_STRING(loffset, haystack_len) ) {
100 zend_argument_value_error(3, "must be contained in argument #1 ($haystack)");
102 }
103
104 /* we checked that it will fit: */
105 offset = (int32_t) loffset;
106 noffset = offset >= 0 ? offset : (int32_t)haystack_len + offset;
107
108 /* the offset is 'grapheme count offset' so it still might be invalid - we'll check it later */
109
110 if (offset >= 0 && grapheme_ascii_check((unsigned char *)haystack, haystack_len) >= 0) {
111 /* quick check to see if the string might be there
112 * I realize that 'offset' is 'grapheme count offset' but will work in spite of that
113 */
114 found = php_memnstr(haystack + noffset, needle, needle_len, haystack + haystack_len);
115
116 /* if it isn't there the we are done */
117 if (found) {
118 RETURN_LONG(found - haystack);
119 }
121 }
122
123 /* do utf16 part of the strpos */
124 ret_pos = grapheme_strpos_utf16(haystack, haystack_len, needle, needle_len, offset, NULL, 0 /* fIgnoreCase */, 0 /* last */ );
125
126 if ( ret_pos >= 0 ) {
127 RETURN_LONG(ret_pos);
128 } else {
130 }
131}
132/* }}} */
133
134/* {{{ Find position of first occurrence of a string within another, ignoring case differences */
136{
137 char *haystack, *needle;
138 size_t haystack_len, needle_len;
139 const char *found;
140 zend_long loffset = 0;
141 int32_t offset = 0;
142 zend_long ret_pos;
143 int is_ascii;
144
146 Z_PARAM_STRING(haystack, haystack_len)
147 Z_PARAM_STRING(needle, needle_len)
149 Z_PARAM_LONG(loffset)
151
152 if ( OUTSIDE_STRING(loffset, haystack_len) ) {
153 zend_argument_value_error(3, "must be contained in argument #1 ($haystack)");
155 }
156
157 /* we checked that it will fit: */
158 offset = (int32_t) loffset;
159
160 /* the offset is 'grapheme count offset' so it still might be invalid - we'll check it later */
161
162 is_ascii = ( grapheme_ascii_check((unsigned char*)haystack, haystack_len) >= 0 );
163
164 if ( is_ascii ) {
165 char *haystack_dup, *needle_dup;
166 int32_t noffset = offset >= 0 ? offset : (int32_t)haystack_len + offset;
167 needle_dup = estrndup(needle, needle_len);
168 zend_str_tolower(needle_dup, needle_len);
169 haystack_dup = estrndup(haystack, haystack_len);
170 zend_str_tolower(haystack_dup, haystack_len);
171
172 found = php_memnstr(haystack_dup + noffset, needle_dup, needle_len, haystack_dup + haystack_len);
173
174 efree(haystack_dup);
175 efree(needle_dup);
176
177 if (found) {
178 RETURN_LONG(found - haystack_dup);
179 }
180
181 /* if needle was ascii too, we are all done, otherwise we need to try using Unicode to see what we get */
182 if ( grapheme_ascii_check((unsigned char *)needle, needle_len) >= 0 ) {
184 }
185 }
186
187 /* do utf16 part of the strpos */
188 ret_pos = grapheme_strpos_utf16(haystack, haystack_len, needle, needle_len, offset, NULL, 1 /* fIgnoreCase */, 0 /*last */ );
189
190 if ( ret_pos >= 0 ) {
191 RETURN_LONG(ret_pos);
192 } else {
194 }
195
196}
197/* }}} */
198
199/* {{{ Find position of last occurrence of a string within another */
201{
202 char *haystack, *needle;
203 size_t haystack_len, needle_len;
204 zend_long loffset = 0;
205 int32_t offset = 0;
206 zend_long ret_pos;
207 int is_ascii;
208
210 Z_PARAM_STRING(haystack, haystack_len)
211 Z_PARAM_STRING(needle, needle_len)
213 Z_PARAM_LONG(loffset)
215
216 if ( OUTSIDE_STRING(loffset, haystack_len) ) {
217 zend_argument_value_error(3, "must be contained in argument #1 ($haystack)");
219 }
220
221 /* we checked that it will fit: */
222 offset = (int32_t) loffset;
223
224 /* the offset is 'grapheme count offset' so it still might be invalid - we'll check it later */
225
226 is_ascii = grapheme_ascii_check((unsigned char *)haystack, haystack_len) >= 0;
227
228 if ( is_ascii ) {
229
230 ret_pos = grapheme_strrpos_ascii(haystack, haystack_len, needle, needle_len, offset);
231
232 if ( ret_pos >= 0 ) {
233 RETURN_LONG(ret_pos);
234 }
235
236 /* if the needle was ascii too, we are done */
237
238 if ( grapheme_ascii_check((unsigned char *)needle, needle_len) >= 0 ) {
240 }
241
242 /* else we need to continue via utf16 */
243 }
244
245 ret_pos = grapheme_strpos_utf16(haystack, haystack_len, needle, needle_len, offset, NULL, 0 /* f_ignore_case */, 1/* last */);
246
247 if ( ret_pos >= 0 ) {
248 RETURN_LONG(ret_pos);
249 } else {
251 }
252
253
254}
255/* }}} */
256
257/* {{{ Find position of last occurrence of a string within another, ignoring case */
259{
260 char *haystack, *needle;
261 size_t haystack_len, needle_len;
262 zend_long loffset = 0;
263 int32_t offset = 0;
264 zend_long ret_pos;
265 int is_ascii;
266
268 Z_PARAM_STRING(haystack, haystack_len)
269 Z_PARAM_STRING(needle, needle_len)
271 Z_PARAM_LONG(loffset)
273
274 if ( OUTSIDE_STRING(loffset, haystack_len) ) {
275 zend_argument_value_error(3, "must be contained in argument #1 ($haystack)");
277 }
278
279 /* we checked that it will fit: */
280 offset = (int32_t) loffset;
281
282 /* the offset is 'grapheme count offset' so it still might be invalid - we'll check it later */
283
284 is_ascii = grapheme_ascii_check((unsigned char *)haystack, haystack_len) >= 0;
285
286 if ( is_ascii ) {
287 char *needle_dup, *haystack_dup;
288
289 needle_dup = estrndup(needle, needle_len);
290 zend_str_tolower(needle_dup, needle_len);
291 haystack_dup = estrndup(haystack, haystack_len);
292 zend_str_tolower(haystack_dup, haystack_len);
293
294 ret_pos = grapheme_strrpos_ascii(haystack_dup, haystack_len, needle_dup, needle_len, offset);
295
296 efree(haystack_dup);
297 efree(needle_dup);
298
299 if ( ret_pos >= 0 ) {
300 RETURN_LONG(ret_pos);
301 }
302
303 /* if the needle was ascii too, we are done */
304
305 if ( grapheme_ascii_check((unsigned char *)needle, needle_len) >= 0 ) {
307 }
308
309 /* else we need to continue via utf16 */
310 }
311
312 ret_pos = grapheme_strpos_utf16(haystack, haystack_len, needle, needle_len, offset, NULL, 1 /* f_ignore_case */, 1 /*last */);
313
314 if ( ret_pos >= 0 ) {
315 RETURN_LONG(ret_pos);
316 } else {
318 }
319
320
321}
322/* }}} */
323
324/* {{{ Returns part of a string */
326{
327 char *str;
328 zend_string *u8_sub_str;
329 UChar *ustr;
330 size_t str_len;
331 int32_t ustr_len;
332 zend_long lstart = 0, length = 0;
333 int32_t start = 0;
334 int iter_val;
335 UErrorCode status;
336 unsigned char u_break_iterator_buffer[U_BRK_SAFECLONE_BUFFERSIZE];
337 UBreakIterator* bi = NULL;
338 int sub_str_start_pos, sub_str_end_pos;
339 int32_t (*iter_func)(UBreakIterator *);
340 bool no_length = true;
341
343 Z_PARAM_STRING(str, str_len)
344 Z_PARAM_LONG(lstart)
346 Z_PARAM_LONG_OR_NULL(length, no_length)
348
349 if (lstart < INT32_MIN || lstart > INT32_MAX) {
350 zend_argument_value_error(2, "is too large");
352 }
353
354 start = (int32_t) lstart;
355
356 if (no_length) {
357 length = str_len;
358 }
359
360 if (length < INT32_MIN || length > INT32_MAX) {
361 zend_argument_value_error(3, "is too large");
363 }
364
365 /* the offset is 'grapheme count offset' so it still might be invalid - we'll check it later */
366
367 if ( grapheme_ascii_check((unsigned char *)str, str_len) >= 0 ) {
368 int32_t asub_str_len;
369 char *sub_str;
370 grapheme_substr_ascii(str, str_len, start, (int32_t)length, &sub_str, &asub_str_len);
371
372 if ( NULL == sub_str ) {
373 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_substr: invalid parameters", 1 );
375 }
376
377 RETURN_STRINGL(sub_str, asub_str_len);
378 }
379
380 ustr = NULL;
381 ustr_len = 0;
383 intl_convert_utf8_to_utf16(&ustr, &ustr_len, str, str_len, &status);
384
385 if ( U_FAILURE( status ) ) {
386 /* Set global error code. */
388
389 /* Set error messages. */
390 intl_error_set_custom_msg( NULL, "Error converting input string to UTF-16", 0 );
391 if (ustr) {
392 efree( ustr );
393 }
395 }
396
397 bi = grapheme_get_break_iterator((void*)u_break_iterator_buffer, &status );
398
399 if( U_FAILURE(status) ) {
401 }
402
403 ubrk_setText(bi, ustr, ustr_len, &status);
404
405 if ( start < 0 ) {
406 iter_func = ubrk_previous;
407 ubrk_last(bi);
408 iter_val = 1;
409 }
410 else {
411 iter_func = ubrk_next;
412 iter_val = -1;
413 }
414
415 sub_str_start_pos = 0;
416
417 while ( start ) {
418 sub_str_start_pos = iter_func(bi);
419
420 if ( UBRK_DONE == sub_str_start_pos ) {
421 break;
422 }
423
424 start += iter_val;
425 }
426
427 if (0 != start) {
428 if (start > 0) {
429 if (ustr) {
430 efree(ustr);
431 }
432 ubrk_close(bi);
434 }
435
436 sub_str_start_pos = 0;
437 ubrk_first(bi);
438 }
439
440 /* OK to convert here since if str_len were big, convert above would fail */
441 if (length >= (int32_t)str_len) {
442
443 /* no length supplied or length is too big, return the rest of the string */
444
446 u8_sub_str = intl_convert_utf16_to_utf8(ustr + sub_str_start_pos, ustr_len - sub_str_start_pos, &status);
447
448 if (ustr) {
449 efree( ustr );
450 }
451 ubrk_close( bi );
452
453 if ( !u8_sub_str ) {
454 /* Set global error code. */
456
457 /* Set error messages. */
458 intl_error_set_custom_msg( NULL, "Error converting output string to UTF-8", 0 );
459
461 }
462
463 /* return the allocated string, not a duplicate */
464 RETVAL_NEW_STR(u8_sub_str);
465 return;
466 }
467
468 if(length == 0) {
469 /* empty length - we've validated start, we can return "" now */
470 if (ustr) {
471 efree(ustr);
472 }
473 ubrk_close(bi);
475 }
476
477 /* find the end point of the string to return */
478
479 if ( length < 0 ) {
480 iter_func = ubrk_previous;
481 ubrk_last(bi);
482 iter_val = 1;
483 }
484 else {
485 iter_func = ubrk_next;
486 iter_val = -1;
487 }
488
489 sub_str_end_pos = 0;
490
491 while ( length ) {
492 sub_str_end_pos = iter_func(bi);
493
494 if ( UBRK_DONE == sub_str_end_pos ) {
495 break;
496 }
497
498 length += iter_val;
499 }
500
501 ubrk_close(bi);
502
503 if ( UBRK_DONE == sub_str_end_pos) {
504 if (length < 0) {
505 efree(ustr);
507 } else {
508 sub_str_end_pos = ustr_len;
509 }
510 }
511
512 if (sub_str_start_pos > sub_str_end_pos) {
513 efree(ustr);
515 }
516
518 u8_sub_str = intl_convert_utf16_to_utf8(ustr + sub_str_start_pos, ( sub_str_end_pos - sub_str_start_pos ), &status);
519
520 efree( ustr );
521
522 if ( !u8_sub_str ) {
523 /* Set global error code. */
525
526 /* Set error messages. */
527 intl_error_set_custom_msg( NULL, "Error converting output string to UTF-8", 0 );
528
530 }
531
532 /* return the allocated string, not a duplicate */
533 RETVAL_NEW_STR(u8_sub_str);
534}
535/* }}} */
536
537/* {{{ strstr_common_handler */
538static void strstr_common_handler(INTERNAL_FUNCTION_PARAMETERS, int f_ignore_case)
539{
540 char *haystack, *needle;
541 const char *found;
542 size_t haystack_len, needle_len;
543 int32_t ret_pos, uchar_pos;
544 bool part = false;
545
547 Z_PARAM_STRING(haystack, haystack_len)
548 Z_PARAM_STRING(needle, needle_len)
550 Z_PARAM_BOOL(part)
552
553 if ( !f_ignore_case ) {
554
555 /* ASCII optimization: quick check to see if the string might be there */
556 found = php_memnstr(haystack, needle, needle_len, haystack + haystack_len);
557
558 /* if it isn't there the we are done */
559 if ( !found ) {
561 }
562
563 /* if it is there, and if the haystack is ascii, we are all done */
564 if ( grapheme_ascii_check((unsigned char *)haystack, haystack_len) >= 0 ) {
565 size_t found_offset = found - haystack;
566
567 if (part) {
568 RETURN_STRINGL(haystack, found_offset);
569 } else {
570 RETURN_STRINGL(found, haystack_len - found_offset);
571 }
572 }
573
574 }
575
576 /* need to work in utf16 */
577 ret_pos = grapheme_strpos_utf16(haystack, haystack_len, needle, needle_len, 0, &uchar_pos, f_ignore_case, 0 /*last */ );
578
579 if ( ret_pos < 0 ) {
581 }
582
583 /* uchar_pos is the 'nth' Unicode character position of the needle */
584
585 ret_pos = 0;
586 U8_FWD_N(haystack, ret_pos, haystack_len, uchar_pos);
587
588 if (part) {
589 RETURN_STRINGL(haystack, ret_pos);
590 } else {
591 RETURN_STRINGL(haystack + ret_pos, haystack_len - ret_pos);
592 }
593
594}
595/* }}} */
596
597/* {{{ Finds first occurrence of a string within another */
599{
600 strstr_common_handler(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0 /* f_ignore_case */);
601}
602/* }}} */
603
604/* {{{ Finds first occurrence of a string within another */
606{
607 strstr_common_handler(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1 /* f_ignore_case */);
608}
609/* }}} */
610
611/* {{{ grapheme_extract_charcount_iter - grapheme iterator for grapheme_extract MAXCHARS */
612static inline int32_t
613grapheme_extract_charcount_iter(UBreakIterator *bi, int32_t csize, unsigned char *pstr, int32_t str_len)
614{
615 int pos = 0;
616 int ret_pos = 0;
617 int break_pos, prev_break_pos;
618 int count = 0;
619
620 while ( 1 ) {
621 pos = ubrk_next(bi);
622
623 if ( UBRK_DONE == pos ) {
624 break;
625 }
626
627 for ( break_pos = ret_pos; break_pos < pos; ) {
628 count++;
629 prev_break_pos = break_pos;
630 U8_FWD_1(pstr, break_pos, str_len);
631
632 if ( prev_break_pos == break_pos ) {
633 /* something wrong - malformed utf8? */
634 csize = 0;
635 break;
636 }
637 }
638
639 /* if we are beyond our limit, then the loop is done */
640 if ( count > csize ) {
641 break;
642 }
643
644 ret_pos = break_pos;
645 }
646
647 return ret_pos;
648}
649/* }}} */
650
651/* {{{ grapheme_extract_bytecount_iter - grapheme iterator for grapheme_extract MAXBYTES */
652static inline int32_t
653grapheme_extract_bytecount_iter(UBreakIterator *bi, int32_t bsize, unsigned char *pstr, int32_t str_len)
654{
655 int pos = 0;
656 int ret_pos = 0;
657
658 while ( 1 ) {
659 pos = ubrk_next(bi);
660
661 if ( UBRK_DONE == pos ) {
662 break;
663 }
664
665 if ( pos > bsize ) {
666 break;
667 }
668
669 ret_pos = pos;
670 }
671
672 return ret_pos;
673}
674/* }}} */
675
676/* {{{ grapheme_extract_count_iter - grapheme iterator for grapheme_extract COUNT */
677static inline int32_t
678grapheme_extract_count_iter(UBreakIterator *bi, int32_t size, unsigned char *pstr, int32_t str_len)
679{
680 int next_pos = 0;
681 int ret_pos = 0;
682
683 while ( size ) {
684 next_pos = ubrk_next(bi);
685
686 if ( UBRK_DONE == next_pos ) {
687 break;
688 }
689 ret_pos = next_pos;
690 size--;
691 }
692
693 return ret_pos;
694}
695/* }}} */
696
697/* {{{ grapheme extract iter function pointer array */
698typedef int32_t (*grapheme_extract_iter)(UBreakIterator * /*bi*/, int32_t /*size*/, unsigned char * /*pstr*/, int32_t /*str_len*/);
699
700static const grapheme_extract_iter grapheme_extract_iters[] = {
701 &grapheme_extract_count_iter,
702 &grapheme_extract_bytecount_iter,
703 &grapheme_extract_charcount_iter,
704};
705/* }}} */
706
707/* {{{ Function to extract a sequence of default grapheme clusters */
709{
710 char *str, *pstr;
711 UText ut = UTEXT_INITIALIZER;
712 size_t str_len;
713 zend_long size; /* maximum number of grapheme clusters, bytes, or characters (based on extract_type) to return */
714 zend_long lstart = 0; /* starting position in str in bytes */
715 int32_t start = 0;
717 UErrorCode status;
718 unsigned char u_break_iterator_buffer[U_BRK_SAFECLONE_BUFFERSIZE];
719 UBreakIterator* bi = NULL;
720 int ret_pos;
721 zval *next = NULL; /* return offset of next part of the string */
722
724 Z_PARAM_STRING(str, str_len)
727 Z_PARAM_LONG(extract_type)
728 Z_PARAM_LONG(lstart)
731
732 if (lstart < 0) {
733 lstart += str_len;
734 }
735
736 if ( NULL != next ) {
739 if (UNEXPECTED(EG(exception))) {
741 }
742 }
743
744 if ( extract_type < GRAPHEME_EXTRACT_TYPE_MIN || extract_type > GRAPHEME_EXTRACT_TYPE_MAX ) {
745 zend_argument_value_error(3, "must be one of GRAPHEME_EXTR_COUNT, GRAPHEME_EXTR_MAXBYTES, or GRAPHEME_EXTR_MAXCHARS");
747 }
748
749 if ( lstart > INT32_MAX || lstart < 0 || (size_t)lstart >= str_len ) {
750 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_extract: start not contained in string", 0 );
752 }
753
754 if (size < 0) {
755 zend_argument_value_error(2, "must be greater than or equal to 0");
757 }
758
759 if (size > INT32_MAX) {
760 zend_argument_value_error(2, "is too large");
762 }
763
764 if (size == 0) {
766 }
767
768 /* we checked that it will fit: */
769 start = (int32_t) lstart;
770
771 pstr = str + start;
772
773 /* just in case pstr points in the middle of a character, move forward to the start of the next char */
774 if ( !U8_IS_SINGLE(*pstr) && !U8_IS_LEAD(*pstr) ) {
775 char *str_end = str + str_len;
776
777 while ( !U8_IS_SINGLE(*pstr) && !U8_IS_LEAD(*pstr) ) {
778 pstr++;
779 if ( pstr >= str_end ) {
781 "grapheme_extract: invalid input string", 0 );
782
784 }
785 }
786 }
787
788 str_len -= (pstr - str);
789
790 /* if the string is all ASCII up to size+1 - or str_len whichever is first - then we are done.
791 (size + 1 because the size-th character might be the beginning of a grapheme cluster)
792 */
793
794 if ( -1 != grapheme_ascii_check((unsigned char *)pstr, MIN(size + 1, str_len)) ) {
795 size_t nsize = MIN(size, str_len);
796 if ( NULL != next ) {
798 }
799 RETURN_STRINGL(pstr, nsize);
800 }
801
803 utext_openUTF8(&ut, pstr, str_len, &status);
804
805 if ( U_FAILURE( status ) ) {
806 /* Set global error code. */
808
809 /* Set error messages. */
810 intl_error_set_custom_msg( NULL, "Error opening UTF-8 text", 0 );
811
813 }
814
815 bi = NULL;
817 bi = grapheme_get_break_iterator(u_break_iterator_buffer, &status );
818
819 ubrk_setUText(bi, &ut, &status);
820 /* if the caller put us in the middle of a grapheme, we can't detect it in all cases since we
821 can't back up. So, we will not do anything. */
822
823 /* now we need to find the end of the chunk the user wants us to return */
824 /* it's ok to convert str_len to in32_t since if it were too big intl_convert_utf8_to_utf16 above would fail */
825 ret_pos = (*grapheme_extract_iters[extract_type])(bi, size, (unsigned char *)pstr, (int32_t)str_len);
826
827 utext_close(&ut);
828 ubrk_close(bi);
829
830 if ( NULL != next ) {
832 }
833
834 RETURN_STRINGL(((char *)pstr), ret_pos);
835}
836
838{
839 char *pstr, *end;
840 zend_string *str;
841 zend_long split_len = 1;
842
843 unsigned char u_break_iterator_buffer[U_BRK_SAFECLONE_BUFFERSIZE];
844 UErrorCode ustatus = U_ZERO_ERROR;
845 int32_t pos, current, i, end_len = 0;
846 UBreakIterator* bi;
847 UText *ut = NULL;
848
850 Z_PARAM_STR(str)
852 Z_PARAM_LONG(split_len)
854
855 if (split_len <= 0 || split_len > UINT_MAX / 4) {
856 zend_argument_value_error(2, "must be greater than 0 and less than or equal to %d", UINT_MAX / 4);
858 }
859
860 if (ZSTR_LEN(str) == 0) {
862 }
863
864 pstr = ZSTR_VAL(str);
865 ut = utext_openUTF8(ut, pstr, ZSTR_LEN(str), &ustatus);
866
867 if ( U_FAILURE( ustatus ) ) {
868 /* Set global error code. */
869 intl_error_set_code( NULL, ustatus );
870
871 /* Set error messages. */
872 intl_error_set_custom_msg( NULL, "Error opening UTF-8 text", 0 );
873
875 }
876
877 bi = NULL;
878 ustatus = U_ZERO_ERROR;
879 bi = grapheme_get_break_iterator((void*)u_break_iterator_buffer, &ustatus );
880
881 if( U_FAILURE(ustatus) ) {
883 }
884
885 ubrk_setUText(bi, ut, &ustatus);
886
887 pos = 0;
889
890 for (end = pstr, i = 0, current = 0; pos != UBRK_DONE;) {
891 end_len = pos - current;
892 pos = ubrk_next(bi);
893
894 if (i == split_len - 1) {
895 if ( pos != UBRK_DONE ) {
897 end = pstr + pos - current;
898 i = 0;
899 }
900 pstr += pos - current;
901 current = pos;
902 } else {
903 i += 1;
904 }
905 }
906
907 if (i != 0 && end_len != 0) {
909 }
910
911 utext_close(ut);
912 ubrk_close(bi);
913}
914
915/* }}} */
bool exception
Definition assert.c:30
count(Countable|array $value, int $mode=COUNT_NORMAL)
const U_ILLEGAL_ARGUMENT_ERROR
const U_ZERO_ERROR
DNS_STATUS status
Definition dns_win32.c:49
new_type size
Definition ffi.c:4365
buf start
Definition ffi.c:4687
zend_long offset
#define NULL
Definition gdcache.h:45
#define GRAPHEME_EXTRACT_TYPE_MAX
Definition grapheme.h:27
#define GRAPHEME_EXTRACT_TYPE_COUNT
Definition grapheme.h:23
int32_t(* grapheme_extract_iter)(UBreakIterator *, int32_t, unsigned char *, int32_t)
zend_long grapheme_strrpos_ascii(char *haystack, size_t haystack_len, char *needle, size_t needle_len, int32_t offset)
int32_t grapheme_strpos_utf16(char *haystack, size_t haystack_len, char *needle, size_t needle_len, int32_t offset, int32_t *puchar_pos, int f_ignore_case, int last)
void grapheme_substr_ascii(char *str, size_t str_len, int32_t f, int32_t l, char **sub_str, int32_t *sub_str_len)
int32_t grapheme_split_string(const UChar *text, int32_t text_length, int boundary_array[], int boundary_array_len)
UBreakIterator * grapheme_get_break_iterator(void *stack_buffer, UErrorCode *status)
zend_long grapheme_ascii_check(const unsigned char *day, size_t len)
#define OUTSIDE_STRING(offset, max_len)
foreach($dp as $el) foreach( $dp as $el) if( $pass2< 2) echo ""
void intl_convert_utf8_to_utf16(UChar **target, int32_t *target_len, const char *src, size_t src_len, UErrorCode *status)
zend_string * intl_convert_utf16_to_utf8(const UChar *src, int32_t src_len, UErrorCode *status)
void intl_error_set(intl_error *err, UErrorCode code, const char *msg, int copyMsg)
Definition intl_error.c:161
void intl_error_set_code(intl_error *err, UErrorCode err_code)
Definition intl_error.c:141
void intl_error_set_custom_msg(intl_error *err, const char *msg, int copyMsg)
Definition intl_error.c:90
#define next(ls)
Definition minilua.c:2661
#define PHP_FUNCTION
Definition php.h:364
#define php_memnstr
Definition php.h:343
unsigned const char * end
Definition php_ffi.h:51
unsigned const char * pos
Definition php_ffi.h:52
grapheme_substr(string $string, int $offset, ?int $length=null)
grapheme_extract(string $haystack, int $size, int $type=GRAPHEME_EXTR_COUNT, int $offset=0, &$next=null)
grapheme_strstr(string $haystack, string $needle, bool $beforeNeedle=false)
grapheme_stristr(string $haystack, string $needle, bool $beforeNeedle=false)
grapheme_strlen(string $string)
grapheme_strripos(string $haystack, string $needle, int $offset=0)
grapheme_stripos(string $haystack, string $needle, int $offset=0)
grapheme_strpos(string $haystack, string $needle, int $offset=0)
grapheme_str_split(string $string, int $length=1)
grapheme_strrpos(string $haystack, string $needle, int $offset=0)
zval * current
Definition session.c:1024
#define INTERNAL_FUNCTION_PARAMETERS
Definition zend.h:49
#define INTERNAL_FUNCTION_PARAM_PASSTHRU
Definition zend.h:50
ZEND_API zend_result add_next_index_stringl(zval *arg, const char *str, size_t length)
Definition zend_API.c:2195
ZEND_API ZEND_COLD void zend_argument_value_error(uint32_t arg_num, const char *format,...)
Definition zend_API.c:433
#define RETURN_STRINGL(s, l)
Definition zend_API.h:1044
#define ZEND_PARSE_PARAMETERS_END()
Definition zend_API.h:1641
#define RETURN_FALSE
Definition zend_API.h:1058
#define RETURN_NULL()
Definition zend_API.h:1036
#define RETVAL_NEW_STR(s)
Definition zend_API.h:1015
#define Z_PARAM_OPTIONAL
Definition zend_API.h:1667
#define Z_PARAM_STRING(dest, dest_len)
Definition zend_API.h:2071
#define Z_PARAM_STR(dest)
Definition zend_API.h:2086
#define ZEND_PARSE_PARAMETERS_START(min_num_args, max_num_args)
Definition zend_API.h:1620
#define ZEND_TRY_ASSIGN_REF_LONG(zv, lval)
Definition zend_API.h:1205
#define Z_PARAM_LONG(dest)
Definition zend_API.h:1896
#define RETURN_LONG(l)
Definition zend_API.h:1037
#define RETURN_THROWS()
Definition zend_API.h:1060
#define Z_PARAM_LONG_OR_NULL(dest, is_null)
Definition zend_API.h:1899
#define RETVAL_LONG(l)
Definition zend_API.h:1011
#define Z_PARAM_BOOL(dest)
Definition zend_API.h:1726
#define RETURN_EMPTY_ARRAY()
Definition zend_API.h:1051
#define RETURN_EMPTY_STRING()
Definition zend_API.h:1047
#define Z_PARAM_ZVAL(dest)
Definition zend_API.h:2100
#define RETVAL_FALSE
Definition zend_API.h:1032
#define array_init(arg)
Definition zend_API.h:537
#define estrndup(s, length)
Definition zend_alloc.h:165
#define efree(ptr)
Definition zend_alloc.h:155
struct _zval_struct zval
#define EG(v)
int32_t zend_long
Definition zend_long.h:42
struct _zend_string zend_string
ZEND_API void ZEND_FASTCALL zend_str_tolower(char *str, size_t length)
#define MIN(a, b)
#define ZEND_ASSERT(c)
#define UNEXPECTED(condition)
#define ZSTR_VAL(zstr)
Definition zend_string.h:68
#define ZSTR_LEN(zstr)
Definition zend_string.h:69
#define Z_ISREF_P(zval_p)
Definition zend_types.h:954
zval * return_value