php-internal-docs 8.4.8
Unofficial docs for php/php-src
Loading...
Searching...
No Matches
collator_sort.c
Go to the documentation of this file.
1/*
2 +----------------------------------------------------------------------+
3 | This source file is subject to version 3.01 of the PHP license, |
4 | that is bundled with this package in the file LICENSE, and is |
5 | available through the world-wide-web at the following url: |
6 | https://www.php.net/license/3_01.txt |
7 | If you did not receive a copy of the PHP license and are unable to |
8 | obtain it through the world-wide-web, please send a note to |
9 | license@php.net so we can mail you a copy immediately. |
10 +----------------------------------------------------------------------+
11 | Authors: Vadim Savchuk <vsavchuk@productengine.com> |
12 | Dmitry Lakhtyuk <dlakhtyuk@productengine.com> |
13 +----------------------------------------------------------------------+
14 */
15
16#ifdef HAVE_CONFIG_H
17#include <config.h>
18#endif
19
20#include "php_intl.h"
21#include "collator.h"
22#include "collator_class.h"
23#include "collator_sort.h"
24#include "collator_convert.h"
25#include "intl_convert.h"
26
27#if !defined(HAVE_PTRDIFF_T) && !defined(_PTRDIFF_T_DEFINED)
29#endif
30
36 char* key; /* pointer to sort key */
37 zval* zstr; /* pointer to original string(hash-item) */
39
41
42static const size_t DEF_SORT_KEYS_BUF_SIZE = 1048576;
43static const size_t DEF_SORT_KEYS_BUF_INCREMENT = 1048576;
44
45static const size_t DEF_SORT_KEYS_INDX_BUF_SIZE = 1048576;
46static const size_t DEF_SORT_KEYS_INDX_BUF_INCREMENT = 1048576;
47
48static const size_t DEF_UTF16_BUF_SIZE = 1024;
49
50/* {{{ collator_regular_compare_function */
51static int collator_regular_compare_function(zval *result, zval *op1, zval *op2)
52{
53 int rc = SUCCESS;
54 zval str1, str2;
55 zval num1, num2;
56 zval norm1, norm2;
57 zval *num1_p = NULL, *num2_p = NULL;
58 zval *norm1_p = NULL, *norm2_p = NULL;
59 zval *str1_p, *str2_p;
60
61 ZVAL_NULL(&str1);
62 str1_p = collator_convert_object_to_string( op1, &str1 );
63 ZVAL_NULL(&str2);
64 str2_p = collator_convert_object_to_string( op2, &str2 );
65
66 /* If both args are strings AND either of args is not numeric string
67 * then use ICU-compare. Otherwise PHP-compare. */
68 if( Z_TYPE_P(str1_p) == IS_STRING && Z_TYPE_P(str2_p) == IS_STRING &&
69 ( str1_p == ( num1_p = collator_convert_string_to_number_if_possible( str1_p, &num1 ) ) ||
70 str2_p == ( num2_p = collator_convert_string_to_number_if_possible( str2_p, &num2 ) ) ) )
71 {
72 /* Compare the strings using ICU. */
74 ZVAL_LONG(result, ucol_strcoll(
76 INTL_ZSTR_VAL(Z_STR_P(str1_p)), INTL_ZSTR_LEN(Z_STR_P(str1_p)),
77 INTL_ZSTR_VAL(Z_STR_P(str2_p)), INTL_ZSTR_LEN(Z_STR_P(str2_p)) ));
78 }
79 else
80 {
81 /* num1 is set if str1 and str2 are strings. */
82 if( num1_p )
83 {
84 if( num1_p == str1_p )
85 {
86 /* str1 is string but not numeric string
87 * just convert it to utf8.
88 */
89 norm1_p = collator_convert_zstr_utf16_to_utf8( str1_p, &norm1 );
90
91 /* num2 is not set but str2 is string => do normalization. */
92 norm2_p = collator_normalize_sort_argument( str2_p, &norm2 );
93 }
94 else
95 {
96 /* str1 is numeric strings => passthru to PHP-compare. */
97 Z_TRY_ADDREF_P(num1_p);
98 norm1_p = num1_p;
99
100 /* str2 is numeric strings => passthru to PHP-compare. */
101 Z_TRY_ADDREF_P(num2_p);
102 norm2_p = num2_p;
103 }
104 }
105 else
106 {
107 /* num1 is not set if str1 or str2 is not a string => do normalization. */
108 norm1_p = collator_normalize_sort_argument( str1_p, &norm1 );
109
110 /* if num1 is not set then num2 is not set as well => do normalization. */
111 norm2_p = collator_normalize_sort_argument( str2_p, &norm2 );
112 }
113
114 rc = compare_function( result, norm1_p, norm2_p );
115
116 zval_ptr_dtor( norm1_p );
117 zval_ptr_dtor( norm2_p );
118 }
119
120 if( num1_p )
121 zval_ptr_dtor( num1_p );
122
123 if( num2_p )
124 zval_ptr_dtor( num2_p );
125
126 zval_ptr_dtor( str1_p );
127 zval_ptr_dtor( str2_p );
128
129 return rc;
130}
131/* }}} */
132
133/* {{{ collator_numeric_compare_function
134 * Convert input args to double and compare it.
135 */
136static int collator_numeric_compare_function(zval *result, zval *op1, zval *op2)
137{
138 zval num1, num2;
139 zval *num1_p = NULL;
140 zval *num2_p = NULL;
141
142 if( Z_TYPE_P(op1) == IS_STRING )
143 {
144 num1_p = collator_convert_string_to_double( op1, &num1 );
145 op1 = num1_p;
146 }
147
148 if( Z_TYPE_P(op2) == IS_STRING )
149 {
150 num2_p = collator_convert_string_to_double( op2, &num2 );
151 op2 = num2_p;
152 }
153
155
156 if( num1_p )
157 zval_ptr_dtor( num1_p );
158 if( num2_p )
159 zval_ptr_dtor( num2_p );
160
161 return SUCCESS;
162}
163/* }}} */
164
165/* {{{ collator_icu_compare_function
166 * Direct use of ucol_strcoll.
167*/
168static int collator_icu_compare_function(zval *result, zval *op1, zval *op2)
169{
170 int rc = SUCCESS;
173
174 /* Compare the strings using ICU. */
176 ZVAL_LONG(result, ucol_strcoll(
178 INTL_ZSTR_VAL(str1), INTL_ZSTR_LEN(str1),
179 INTL_ZSTR_VAL(str2), INTL_ZSTR_LEN(str2) ));
180
181 zend_string_release(str1);
182 zend_string_release(str2);
183
184 return rc;
185}
186/* }}} */
187
188/* {{{ collator_compare_func
189 * Taken from PHP7 source (array_data_compare).
190 */
191static int collator_compare_func(Bucket *f, Bucket *s)
192{
193 zval result;
194 zval *first = &f->val;
195 zval *second = &s->val;
196
197 if( INTL_G(compare_func)( &result, first, second) == FAILURE )
198 return 0;
199
200 if( Z_TYPE(result) == IS_DOUBLE )
201 {
202 if( Z_DVAL(result) < 0 )
203 return -1;
204 else if( Z_DVAL(result) > 0 )
205 return 1;
206 else
207 return 0;
208 }
209
211
212 if( Z_LVAL(result) < 0 )
213 return -1;
214 else if( Z_LVAL(result) > 0 )
215 return 1;
216
217 return 0;
218}
219/* }}} */
220
221/* {{{ Compare sort keys */
222static int collator_cmp_sort_keys( const void *p1, const void *p2 )
223{
224 char* key1 = ((collator_sort_key_index_t*)p1)->key;
225 char* key2 = ((collator_sort_key_index_t*)p2)->key;
226
227 return strcmp( key1, key2 );
228}
229/* }}} */
230
231/* {{{ Choose compare function according to sort flags. */
232static collator_compare_func_t collator_get_compare_function( const zend_long sort_flags )
233{
235
236 switch( sort_flags )
237 {
239 func = collator_numeric_compare_function;
240 break;
241
243 func = collator_icu_compare_function;
244 break;
245
247 default:
248 func = collator_regular_compare_function;
249 break;
250 }
251
252 return func;
253}
254/* }}} */
255
256/* {{{ Common code shared by collator_sort() and collator_asort() API functions. */
257static void collator_sort_internal( int renumber, INTERNAL_FUNCTION_PARAMETERS )
258{
259 UCollator* saved_collator;
260 zval* array = NULL;
262 zend_long sort_flags = COLLATOR_SORT_REGULAR;
263
265
266 /* Parse parameters. */
268 &object, Collator_ce_ptr, &array, &sort_flags ) == FAILURE )
269 {
271 }
272
273 /* Fetch the object. */
275
276 if (!co->ucoll) {
277 zend_throw_error(NULL, "Object not initialized");
279 }
280
281 /* Set 'compare function' according to sort flags. */
282 INTL_G(compare_func) = collator_get_compare_function( sort_flags );
283
284 hash = Z_ARRVAL_P( array );
285
286 /* Convert strings in the specified array from UTF-8 to UTF-16. */
288 COLLATOR_CHECK_STATUS( co, "Error converting hash from UTF-8 to UTF-16" );
289
290 /* Save specified collator in the request-global (?) variable. */
291 saved_collator = INTL_G( current_collator );
292 INTL_G( current_collator ) = co->ucoll;
293
294 /* Sort specified array. */
295 zend_hash_sort(hash, collator_compare_func, renumber);
296
297 /* Restore saved collator. */
298 INTL_G( current_collator ) = saved_collator;
299
300 /* Convert strings in the specified array back to UTF-8. */
302 COLLATOR_CHECK_STATUS( co, "Error converting hash from UTF-16 to UTF-8" );
303
305}
306/* }}} */
307
308/* {{{ Sort array using specified collator. */
310{
311 collator_sort_internal( true, INTERNAL_FUNCTION_PARAM_PASSTHRU );
312}
313/* }}} */
314
315static void collator_sortkey_swap(collator_sort_key_index_t *p, collator_sort_key_index_t *q) /* {{{ */
316{
318 t = *p;
319 *p = *q;
320 *q = t;
321}
322/* }}} */
323
324/* {{{ Equivalent to standard PHP sort using Collator.
325 * Uses ICU ucol_getSortKey for performance.
326 */
328{
329 zval* array = NULL;
332 zval* hashData = NULL; /* currently processed item of input hash */
333
334 char* sortKeyBuf = NULL; /* buffer to store sort keys */
335 uint32_t sortKeyBufSize = DEF_SORT_KEYS_BUF_SIZE; /* buffer size */
336 ptrdiff_t sortKeyBufOffset = 0; /* pos in buffer to store sort key */
337 uint32_t sortKeyLen = 0; /* the length of currently processing key */
338 uint32_t bufLeft = 0;
339 uint32_t bufIncrement = 0;
340
341 collator_sort_key_index_t* sortKeyIndxBuf = NULL; /* buffer to store 'indexes' which will be passed to 'qsort' */
342 uint32_t sortKeyIndxBufSize = DEF_SORT_KEYS_INDX_BUF_SIZE;
343 uint32_t sortKeyIndxSize = sizeof( collator_sort_key_index_t );
344
345 uint32_t sortKeyCount = 0;
346 uint32_t j = 0;
347
348 UChar* utf16_buf = NULL; /* tmp buffer to hold current processing string in utf-16 */
349 int utf16_buf_size = DEF_UTF16_BUF_SIZE; /* the length of utf16_buf */
350 int utf16_len = 0; /* length of converted string */
351
353
354 /* Parse parameters. */
356 &object, Collator_ce_ptr, &array ) == FAILURE )
357 {
359 }
360
361 /* Fetch the object. */
363
364 if (!co || !co->ucoll) {
367 "Object not initialized", 0 );
368 zend_throw_error(NULL, "Object not initialized");
369
371 }
372
373 /*
374 * Sort specified array.
375 */
376 hash = Z_ARRVAL_P( array );
377
378 if( !hash || zend_hash_num_elements( hash ) == 0 )
380
381 /* Create bufers */
382 sortKeyBuf = ecalloc( sortKeyBufSize, sizeof( char ) );
383 sortKeyIndxBuf = ecalloc( sortKeyIndxBufSize, sizeof( uint8_t ) );
384 utf16_buf = eumalloc( utf16_buf_size );
385
386 /* Iterate through input hash and create a sort key for each value. */
387 ZEND_HASH_FOREACH_VAL(hash, hashData) {
388 /* Convert current hash item from UTF-8 to UTF-16LE and save the result to utf16_buf. */
389
390 utf16_len = utf16_buf_size;
391
392 /* Process string values only. */
393 if( Z_TYPE_P( hashData ) == IS_STRING )
394 {
395 intl_convert_utf8_to_utf16( &utf16_buf, &utf16_len, Z_STRVAL_P( hashData ), Z_STRLEN_P( hashData ), COLLATOR_ERROR_CODE_P( co ) );
396
397 if( U_FAILURE( COLLATOR_ERROR_CODE( co ) ) )
398 {
400 intl_errors_set_custom_msg( COLLATOR_ERROR_P( co ), "Sort with sort keys failed", 0 );
401
402 if( utf16_buf )
403 efree( utf16_buf );
404
405 efree( sortKeyIndxBuf );
406 efree( sortKeyBuf );
407
409 }
410 }
411 else
412 {
413 /* Set empty string */
414 utf16_len = 0;
415 utf16_buf[utf16_len] = 0;
416 }
417
418 if( (utf16_len + 1) > utf16_buf_size )
419 utf16_buf_size = utf16_len + 1;
420
421 /* Get sort key, reallocating the buffer if needed. */
422 bufLeft = sortKeyBufSize - sortKeyBufOffset;
423
424 sortKeyLen = ucol_getSortKey( co->ucoll,
425 utf16_buf,
426 utf16_len,
427 (uint8_t*)sortKeyBuf + sortKeyBufOffset,
428 bufLeft );
429
430 /* check for sortKeyBuf overflow, increasing its size of the buffer if needed */
431 if( sortKeyLen > bufLeft )
432 {
433 bufIncrement = ( sortKeyLen > DEF_SORT_KEYS_BUF_INCREMENT ) ? sortKeyLen : DEF_SORT_KEYS_BUF_INCREMENT;
434
435 sortKeyBufSize += bufIncrement;
436 bufLeft += bufIncrement;
437
438 sortKeyBuf = erealloc( sortKeyBuf, sortKeyBufSize );
439
440 sortKeyLen = ucol_getSortKey( co->ucoll, utf16_buf, utf16_len, (uint8_t*)sortKeyBuf + sortKeyBufOffset, bufLeft );
441 }
442
443 /* check sortKeyIndxBuf overflow, increasing its size of the buffer if needed */
444 if( ( sortKeyCount + 1 ) * sortKeyIndxSize > sortKeyIndxBufSize )
445 {
446 bufIncrement = ( sortKeyIndxSize > DEF_SORT_KEYS_INDX_BUF_INCREMENT ) ? sortKeyIndxSize : DEF_SORT_KEYS_INDX_BUF_INCREMENT;
447
448 sortKeyIndxBufSize += bufIncrement;
449
450 sortKeyIndxBuf = erealloc( sortKeyIndxBuf, sortKeyIndxBufSize );
451 }
452
453 sortKeyIndxBuf[sortKeyCount].key = (char*)sortKeyBufOffset; /* remember just offset, cause address */
454 /* of 'sortKeyBuf' may be changed due to realloc. */
455 sortKeyIndxBuf[sortKeyCount].zstr = hashData;
456
457 sortKeyBufOffset += sortKeyLen;
458 ++sortKeyCount;
459
461
462 /* update ptrs to point to valid keys. */
463 for( j = 0; j < sortKeyCount; j++ )
464 sortKeyIndxBuf[j].key = sortKeyBuf + (ptrdiff_t)sortKeyIndxBuf[j].key;
465
466 /* sort it */
467 zend_sort( sortKeyIndxBuf, sortKeyCount,
468 sortKeyIndxSize, collator_cmp_sort_keys, (swap_func_t)collator_sortkey_swap);
469
470 ZVAL_COPY_VALUE(&garbage, array);
471 /* for resulting hash we'll assign new hash keys rather then reordering */
472 array_init(array);
473
474 for( j = 0; j < sortKeyCount; j++ )
475 {
476 Z_TRY_ADDREF_P( sortKeyIndxBuf[j].zstr );
477 zend_hash_next_index_insert( Z_ARRVAL_P(array), sortKeyIndxBuf[j].zstr);
478 }
479
480 if( utf16_buf )
481 efree( utf16_buf );
482
484 efree( sortKeyIndxBuf );
485 efree( sortKeyBuf );
486
488}
489/* }}} */
490
491/* {{{ Sort array using specified collator, maintaining index association. */
493{
494 collator_sort_internal( false, INTERNAL_FUNCTION_PARAM_PASSTHRU );
495}
496/* }}} */
497
498/* {{{ Get a sort key for a string from a Collator. */
500{
501 char* str = NULL;
502 size_t str_len = 0;
503 UChar* ustr = NULL;
504 int32_t ustr_len = 0;
505 int key_len = 0;
506 zend_string* key_str;
507
509
510 /* Parse parameters. */
512 &object, Collator_ce_ptr, &str, &str_len ) == FAILURE )
513 {
515 }
516
517 /* Fetch the object. */
519
520 if (!co || !co->ucoll) {
523 "Object not initialized", 0 );
524 zend_throw_error(NULL, "Object not initialized");
525
527 }
528
529 /*
530 * Compare given strings (converting them to UTF-16 first).
531 */
532
533 /* First convert the strings to UTF-16. */
535 &ustr, &ustr_len, str, str_len, COLLATOR_ERROR_CODE_P( co ) );
536 if( U_FAILURE( COLLATOR_ERROR_CODE( co ) ) )
537 {
538 /* Set global error code. */
540
541 /* Set error messages. */
543 "Error converting first argument to UTF-16", 0 );
544 efree( ustr );
546 }
547
548 /* ucol_getSortKey is exception in that the key length includes the
549 * NUL terminator*/
550 key_len = ucol_getSortKey(co->ucoll, ustr, ustr_len, NULL, 0);
551 if(!key_len) {
552 efree( ustr );
554 }
555 key_str = zend_string_alloc(key_len, 0);
556 key_len = ucol_getSortKey(co->ucoll, ustr, ustr_len, (uint8_t*)ZSTR_VAL(key_str), key_len);
557 efree( ustr );
558 if(!key_len) {
560 }
561 ZSTR_LEN(key_str) = key_len - 1;
562 RETVAL_NEW_STR(key_str);
563}
564/* }}} */
char s[4]
Definition cdf.c:77
#define COLLATOR_SORT_NUMERIC
Definition collator.h:23
#define COLLATOR_SORT_REGULAR
Definition collator.h:21
#define COLLATOR_SORT_STRING
Definition collator.h:22
zend_class_entry * Collator_ce_ptr
#define COLLATOR_ERROR_CODE_P(co)
#define COLLATOR_CHECK_STATUS(co, msg)
#define COLLATOR_ERROR_P(co)
#define COLLATOR_METHOD_FETCH_OBJECT
#define COLLATOR_ERROR_CODE(co)
#define COLLATOR_METHOD_INIT_VARS
zval * collator_normalize_sort_argument(zval *arg, zval *rv)
zend_string * collator_zval_to_string(zval *arg)
void collator_convert_hash_from_utf16_to_utf8(HashTable *hash, UErrorCode *status)
zval * collator_convert_string_to_number_if_possible(zval *str, zval *rv)
zval * collator_convert_string_to_double(zval *str, zval *rv)
void collator_convert_hash_from_utf8_to_utf16(HashTable *hash, UErrorCode *status)
zval * collator_convert_zstr_utf16_to_utf8(zval *utf16_zval, zval *rv)
zval * collator_convert_object_to_string(zval *obj, zval *rv)
zend_long ptrdiff_t
struct _collator_sort_key_index collator_sort_key_index_t
int(* collator_compare_func_t)(zval *result, zval *op1, zval *op2)
#define NULL
Definition gdcache.h:45
hash(string $algo, string $data, bool $binary=false, array $options=[])
Definition hash.stub.php:12
#define SUCCESS
Definition hash_sha3.c:261
again j
#define INTL_ZSTR_LEN(str)
Definition intl_common.h:42
#define INTL_ZSTR_VAL(str)
Definition intl_common.h:41
#define eumalloc(size)
Definition intl_common.h:31
void intl_convert_utf8_to_utf16(UChar **target, int32_t *target_len, const char *src, size_t src_len, UErrorCode *status)
void intl_errors_set_custom_msg(intl_error *err, const char *msg, int copyMsg)
Definition intl_error.c:187
void intl_error_set_code(intl_error *err, UErrorCode err_code)
Definition intl_error.c:141
#define PHP_FUNCTION
Definition php.h:364
#define INTL_G(v)
Definition php_intl.h:62
collator_compare_func_t compare_func
Definition php_intl.h:49
struct UCollator * current_collator
Definition php_intl.h:47
collator_sort_with_sort_keys(Collator $object, array &$array)
collator_asort(Collator $object, array &$array, int $flags=Collator::SORT_REGULAR)
collator_get_sort_key(Collator $object, string $string)
collator_sort(Collator $object, array &$array, int $flags=Collator::SORT_REGULAR)
unsigned char key[REFLECTION_KEY_LEN]
p
Definition session.c:1105
zval val
Definition zend_types.h:381
ZEND_API ZEND_COLD void zend_throw_error(zend_class_entry *exception_ce, const char *format,...)
Definition zend.c:1772
#define INTERNAL_FUNCTION_PARAMETERS
Definition zend.h:49
#define INTERNAL_FUNCTION_PARAM_PASSTHRU
Definition zend.h:50
ZEND_API zend_result zend_parse_method_parameters(uint32_t num_args, zval *this_ptr, const char *type_spec,...)
Definition zend_API.c:1314
#define ZEND_NUM_ARGS()
Definition zend_API.h:530
#define RETURN_FALSE
Definition zend_API.h:1058
#define RETVAL_NEW_STR(s)
Definition zend_API.h:1015
#define RETURN_THROWS()
Definition zend_API.h:1060
#define ZEND_EXTERN_MODULE_GLOBALS(module_name)
Definition zend_API.h:270
#define getThis()
Definition zend_API.h:526
#define RETURN_TRUE
Definition zend_API.h:1059
#define array_init(arg)
Definition zend_API.h:537
#define ecalloc(nmemb, size)
Definition zend_alloc.h:158
#define efree(ptr)
Definition zend_alloc.h:155
#define erealloc(ptr, size)
Definition zend_alloc.h:159
struct _zval_struct zval
strcmp(string $string1, string $string2)
execute_data func
ZEND_API zval *ZEND_FASTCALL zend_hash_next_index_insert(HashTable *ht, zval *pData)
Definition zend_hash.c:1224
#define ZEND_HASH_FOREACH_END()
Definition zend_hash.h:1086
#define ZEND_HASH_FOREACH_VAL(ht, _val)
Definition zend_hash.h:1102
int32_t zend_long
Definition zend_long.h:42
struct _zend_string zend_string
ZEND_API int ZEND_FASTCALL numeric_compare_function(zval *op1, zval *op2)
ZEND_API zend_result ZEND_FASTCALL compare_function(zval *result, zval *op1, zval *op2)
ZEND_API void ZEND_FASTCALL convert_to_long(zval *op)
#define ZEND_ASSERT(c)
ZEND_API void zend_sort(void *base, size_t nmemb, size_t siz, compare_func_t cmp, swap_func_t swp)
Definition zend_sort.c:248
#define ZSTR_VAL(zstr)
Definition zend_string.h:68
#define ZSTR_LEN(zstr)
Definition zend_string.h:69
#define Z_TYPE_P(zval_p)
Definition zend_types.h:660
#define Z_TRY_ADDREF_P(pz)
#define Z_DVAL(zval)
Definition zend_types.h:968
#define Z_STRVAL_P(zval_p)
Definition zend_types.h:975
#define Z_ARRVAL_P(zval_p)
Definition zend_types.h:987
#define ZVAL_NULL(z)
#define ZVAL_LONG(z, l)
#define IS_STRING
Definition zend_types.h:606
struct _zend_array HashTable
Definition zend_types.h:386
#define IS_DOUBLE
Definition zend_types.h:605
#define Z_STR_P(zval_p)
Definition zend_types.h:972
#define Z_STRLEN_P(zval_p)
Definition zend_types.h:978
@ FAILURE
Definition zend_types.h:61
struct _Bucket Bucket
void(* swap_func_t)(void *, void *)
Definition zend_types.h:105
#define Z_TYPE(zval)
Definition zend_types.h:659
#define ZVAL_COPY_VALUE(z, v)
#define Z_LVAL(zval)
Definition zend_types.h:965
ZEND_API void zval_ptr_dtor(zval *zval_ptr)
bool result
op2
op1
zend_refcounted * garbage