php-internal-docs 8.4.8
Unofficial docs for php/php-src
Loading...
Searching...
No Matches
uchar.c
Go to the documentation of this file.
1#include "uchar.h"
2#include "intl_data.h"
3#include "intl_convert.h"
4
5#include <unicode/uchar.h>
6#include <unicode/utf8.h>
7
8#include "uchar_arginfo.h"
9
10#define IC_METHOD(mname) PHP_METHOD(IntlChar, mname)
11
12static inline int convert_cp(UChar32* pcp, zend_string *string_codepoint, zend_long int_codepoint) {
13 if (string_codepoint != NULL) {
14 int32_t i = 0;
15 size_t string_codepoint_length = ZSTR_LEN(string_codepoint);
16
17 if (ZEND_SIZE_T_INT_OVFL(string_codepoint_length)) {
19 intl_error_set_custom_msg(NULL, "Input string is too long.", 0);
20 return FAILURE;
21 }
22
23 U8_NEXT(ZSTR_VAL(string_codepoint), i, string_codepoint_length, int_codepoint);
24 if ((size_t)i != string_codepoint_length) {
26 intl_error_set_custom_msg(NULL, "Passing a UTF-8 character for codepoint requires a string which is exactly one UTF-8 codepoint long.", 0);
27 return FAILURE;
28 }
29 }
30
31 if ((int_codepoint < UCHAR_MIN_VALUE) || (int_codepoint > UCHAR_MAX_VALUE)) {
33 intl_error_set_custom_msg(NULL, "Codepoint out of range", 0);
34 return FAILURE;
35 }
36 *pcp = (UChar32)int_codepoint;
37 return SUCCESS;
38}
39
40static zend_never_inline int parse_code_point_param(INTERNAL_FUNCTION_PARAMETERS, UChar32 *cp) {
41 zend_string *string_codepoint;
42 zend_long int_codepoint = 0;
44 Z_PARAM_STR_OR_LONG(string_codepoint, int_codepoint)
46 return convert_cp(cp, string_codepoint, int_codepoint);
47}
48
49/* {{{ Converts a numeric codepoint to UTF-8
50 * Acts as an identify function when given a valid UTF-8 encoded codepoint
51 */
53 UChar32 cp;
54 char buffer[5];
55 int buffer_len = 0;
56
57 if (parse_code_point_param(INTERNAL_FUNCTION_PARAM_PASSTHRU, &cp) == FAILURE) {
59 }
60
61 /* We can use unsafe because we know the codepoint is in valid range
62 * and that 4 bytes is enough for any unicode point
63 */
64 U8_APPEND_UNSAFE(buffer, buffer_len, cp);
65 buffer[buffer_len] = 0;
66 RETURN_STRINGL(buffer, buffer_len);
67}
68/* }}} */
69
70/* {{{ Converts a UTf-8 encoded codepoint to its integer U32 value
71 * Acts as an identity function when passed a valid integer codepoint
72 */
74 UChar32 cp;
75
76 if (parse_code_point_param(INTERNAL_FUNCTION_PARAM_PASSTHRU, &cp) == FAILURE) {
78 }
79
81}
82/* }}} */
83
84/* {{{ */
85IC_METHOD(hasBinaryProperty) {
86 UChar32 cp;
87 zend_long prop;
88 zend_string *string_codepoint;
89 zend_long int_codepoint = 0;
90
92 Z_PARAM_STR_OR_LONG(string_codepoint, int_codepoint)
93 Z_PARAM_LONG(prop)
95
96 if (convert_cp(&cp, string_codepoint, int_codepoint) == FAILURE) {
98 }
99
100 RETURN_BOOL(u_hasBinaryProperty(cp, (UProperty)prop));
101}
102/* }}} */
103
104/* {{{ */
105IC_METHOD(getIntPropertyValue) {
106 UChar32 cp;
107 zend_long prop;
108 zend_string *string_codepoint;
109 zend_long int_codepoint = 0;
110
112 Z_PARAM_STR_OR_LONG(string_codepoint, int_codepoint)
113 Z_PARAM_LONG(prop)
115
116 if (convert_cp(&cp, string_codepoint, int_codepoint) == FAILURE) {
117 RETURN_NULL();
118 }
119
120 RETURN_LONG(u_getIntPropertyValue(cp, (UProperty)prop));
121}
122/* }}} */
123
124/* {{{ */
125IC_METHOD(getIntPropertyMinValue) {
126 zend_long prop;
127
129 Z_PARAM_LONG(prop)
131
132 RETURN_LONG(u_getIntPropertyMinValue((UProperty)prop));
133}
134/* }}} */
135
136/* {{{ */
137IC_METHOD(getIntPropertyMaxValue) {
138 zend_long prop;
139
141 Z_PARAM_LONG(prop)
143
144 RETURN_LONG(u_getIntPropertyMaxValue((UProperty)prop));
145}
146/* }}} */
147
148/* {{{ */
149IC_METHOD(getNumericValue) {
150 UChar32 cp;
151
152 if (parse_code_point_param(INTERNAL_FUNCTION_PARAM_PASSTHRU, &cp) == FAILURE) {
153 RETURN_NULL();
154 }
155
156 RETURN_DOUBLE(u_getNumericValue(cp));
157}
158/* }}} */
159
160/* {{{ */
165static UBool enumCharType_callback(enumCharType_data *context,
166 UChar32 start, UChar32 limit, UCharCategory type) {
167 zval retval;
168 zval args[3];
169
171 /* Note that $start is INclusive, while $limit is EXclusive
172 * Therefore (0, 32, 15) means CPs 0..31 are of type 15
173 */
174 ZVAL_LONG(&args[0], start);
175 ZVAL_LONG(&args[1], limit);
176 ZVAL_LONG(&args[2], type);
177
178 context->fci.retval = &retval;
179 context->fci.param_count = 3;
180 context->fci.params = args;
181
182 if (zend_call_function(&context->fci, &context->fci_cache) == FAILURE) {
184 intl_errors_set_custom_msg(NULL, "enumCharTypes callback failed", 0);
186 return 0;
187 }
189 return 1;
190}
191IC_METHOD(enumCharTypes) {
193
195 Z_PARAM_FUNC(context.fci, context.fci_cache)
197 u_enumCharTypes((UCharEnumTypeRange*)enumCharType_callback, &context);
198}
199/* }}} */
200
201/* {{{ */
202IC_METHOD(getBlockCode) {
203 UChar32 cp;
204
205 if (parse_code_point_param(INTERNAL_FUNCTION_PARAM_PASSTHRU, &cp) == FAILURE) {
206 RETURN_NULL();
207 }
208
209 RETURN_LONG(ublock_getCode(cp));
210}
211/* }}} */
212
213/* {{{ */
214IC_METHOD(charName) {
215 UChar32 cp;
216 zend_string *string_codepoint;
217 zend_long int_codepoint = 0;
218 UErrorCode error = U_ZERO_ERROR;
219 zend_long nameChoice = U_UNICODE_CHAR_NAME;
221 int32_t buffer_len;
222
224 Z_PARAM_STR_OR_LONG(string_codepoint, int_codepoint)
226 Z_PARAM_LONG(nameChoice)
228
229 if (convert_cp(&cp, string_codepoint, int_codepoint) == FAILURE) {
230 RETURN_NULL();
231 }
232
233 buffer_len = u_charName(cp, (UCharNameChoice)nameChoice, NULL, 0, &error);
234 buffer = zend_string_alloc(buffer_len, 0);
236 buffer_len = u_charName(cp, (UCharNameChoice)nameChoice, ZSTR_VAL(buffer), ZSTR_LEN(buffer) + 1, &error);
237 if (U_FAILURE(error)) {
238 zend_string_efree(buffer);
239 INTL_CHECK_STATUS_OR_NULL(error, "Failure getting character name");
240 }
242}
243/* }}} */
244
245/* {{{ */
246IC_METHOD(charFromName) {
247 char *name;
248 size_t name_len;
249 zend_long nameChoice = U_UNICODE_CHAR_NAME;
250 UChar32 ret;
251 UErrorCode error = U_ZERO_ERROR;
252
254 Z_PARAM_STRING(name, name_len)
256 Z_PARAM_LONG(nameChoice)
258
259 ret = u_charFromName((UCharNameChoice)nameChoice, name, &error);
262}
263/* }}} */
264
265/* {{{ void void IntlChar::enumCharNames(int|string $start, int|string $limit, callable $callback, int $nameChoice = IntlChar::UNICODE_CHAR_NAME) */
270static UBool enumCharNames_callback(enumCharNames_data *context,
271 UChar32 code, UCharNameChoice nameChoice,
272 const char *name, int32_t length) {
273 zval retval;
274 zval args[3];
275
277 ZVAL_LONG(&args[0], code);
278 ZVAL_LONG(&args[1], nameChoice);
279 ZVAL_STRINGL(&args[2], name, length);
280
281 context->fci.retval = &retval;
282 context->fci.param_count = 3;
283 context->fci.params = args;
284
285 if (zend_call_function(&context->fci, &context->fci_cache) == FAILURE) {
287 intl_error_set_custom_msg(NULL, "enumCharNames callback failed", 0);
289 zval_ptr_dtor_str(&args[2]);
290 return 0;
291 }
293 zval_ptr_dtor_str(&args[2]);
294 return 1;
295}
296IC_METHOD(enumCharNames) {
297 UChar32 start, limit;
298 zend_string *string_start, *string_limit;
299 zend_long int_start = 0, int_limit = 0;
301 zend_long nameChoice = U_UNICODE_CHAR_NAME;
302 UErrorCode error = U_ZERO_ERROR;
303
304
306 Z_PARAM_STR_OR_LONG(string_start, int_start)
307 Z_PARAM_STR_OR_LONG(string_limit, int_limit)
308 Z_PARAM_FUNC(context.fci, context.fci_cache)
310 Z_PARAM_LONG(nameChoice)
312
313 if (convert_cp(&start, string_start, int_start) == FAILURE || convert_cp(&limit, string_limit, int_limit) == FAILURE) {
315 }
316
317 u_enumCharNames(start, limit, (UEnumCharNamesFn*)enumCharNames_callback, &context, nameChoice, &error);
320}
321/* }}} */
322
323/* {{{ */
324IC_METHOD(getPropertyName) {
326 zend_long nameChoice = U_LONG_PROPERTY_NAME;
327 const char *ret;
328
332 Z_PARAM_LONG(nameChoice)
334
335 ret = u_getPropertyName((UProperty)property, (UPropertyNameChoice)nameChoice);
336 if (ret) {
338 } else {
340 intl_error_set_custom_msg(NULL, "Failed to get property name", 0);
342 }
343}
344/* }}} */
345
346/* {{{ */
347IC_METHOD(getPropertyEnum) {
348 char *alias;
349 size_t alias_len;
350
352 Z_PARAM_STRING(alias, alias_len)
354
355 RETURN_LONG(u_getPropertyEnum(alias));
356}
357/* }}} */
358
359/* {{{ */
360IC_METHOD(getPropertyValueName) {
361 zend_long property, value, nameChoice = U_LONG_PROPERTY_NAME;
362 const char *ret;
363
368 Z_PARAM_LONG(nameChoice)
370
371 ret = u_getPropertyValueName((UProperty)property, value, (UPropertyNameChoice)nameChoice);
372 if (ret) {
374 } else {
376 intl_error_set_custom_msg(NULL, "Failed to get property name", 0);
378 }
379}
380/* }}} */
381
382/* {{{ */
383IC_METHOD(getPropertyValueEnum) {
385 char *name;
386 size_t name_len;
387
390 Z_PARAM_STRING(name, name_len)
392
393 RETURN_LONG(u_getPropertyValueEnum((UProperty)property, name));
394}
395/* }}} */
396
397/* {{{ */
398IC_METHOD(foldCase) {
399 UChar32 cp, ret;
400 zend_long options = U_FOLD_CASE_DEFAULT;
401 zend_string *string_codepoint;
402 zend_long int_codepoint = 0;
403
405 Z_PARAM_STR_OR_LONG(string_codepoint, int_codepoint)
409
410 if (convert_cp(&cp, string_codepoint, int_codepoint) == FAILURE) {
411 RETURN_NULL();
412 }
413
414 ret = u_foldCase(cp, options);
415 if (string_codepoint != NULL) {
416 char buffer[5];
417 int buffer_len = 0;
418 U8_APPEND_UNSAFE(buffer, buffer_len, ret);
419 buffer[buffer_len] = 0;
420 RETURN_STRINGL(buffer, buffer_len);
421 } else {
423 }
424}
425/* }}} */
426
427/* {{{ */
428IC_METHOD(digit) {
429 UChar32 cp;
430 zend_long radix = 10;
431 int ret;
432 zend_string *string_codepoint;
433 zend_long int_codepoint = 0;
434
436 Z_PARAM_STR_OR_LONG(string_codepoint, int_codepoint)
438 Z_PARAM_LONG(radix)
440
441 if (convert_cp(&cp, string_codepoint, int_codepoint) == FAILURE) {
442 RETURN_NULL();
443 }
444
445 ret = u_digit(cp, radix);
446 if (ret < 0) {
448 intl_error_set_custom_msg(NULL, "Invalid digit", 0);
450 }
452}
453/* }}} */
454
455/* {{{ */
456IC_METHOD(forDigit) {
457 zend_long digit, radix = 10;
458
460 Z_PARAM_LONG(digit)
462 Z_PARAM_LONG(radix)
464
465 RETURN_LONG(u_forDigit(digit, radix));
466}
467/* }}} */
468
469/* {{{ */
470IC_METHOD(charAge) {
471 UChar32 cp;
472 UVersionInfo version;
473 int i;
474
475 if (parse_code_point_param(INTERNAL_FUNCTION_PARAM_PASSTHRU, &cp) == FAILURE) {
476 RETURN_NULL();
477 }
478
479 u_charAge(cp, version);
481 for(i = 0; i < U_MAX_VERSION_LENGTH; ++i) {
483 }
484}
485/* }}} */
486
487/* {{{ */
488IC_METHOD(getUnicodeVersion) {
489 UVersionInfo version;
490 int i;
491
493
494 u_getUnicodeVersion(version);
496 for(i = 0; i < U_MAX_VERSION_LENGTH; ++i) {
498 }
499}
500/* }}} */
501
502/* {{{ */
503IC_METHOD(getFC_NFKC_Closure) {
504 UChar32 cp;
505 UChar *closure;
506 zend_string *u8str;
507 int32_t closure_len;
508 UErrorCode error = U_ZERO_ERROR;
509
510 if (parse_code_point_param(INTERNAL_FUNCTION_PARAM_PASSTHRU, &cp) == FAILURE) {
511 RETURN_NULL();
512 }
513
514 closure_len = u_getFC_NFKC_Closure(cp, NULL, 0, &error);
515 if (closure_len == 0) {
517 }
518 closure = safe_emalloc(sizeof(UChar), closure_len + 1, 0);
520 closure_len = u_getFC_NFKC_Closure(cp, closure, closure_len, &error);
521 if (U_FAILURE(error)) {
522 efree(closure);
523 INTL_CHECK_STATUS(error, "Failed getting closure");
524 }
525
527 u8str = intl_convert_utf16_to_utf8(closure, closure_len, &error);
528 INTL_CHECK_STATUS(error, "Failed converting output to UTF8");
529 efree(closure);
530 RETVAL_NEW_STR(u8str);
531}
532/* }}} */
533
534/* {{{ */
535#define IC_BOOL_METHOD_CHAR(name) \
536IC_METHOD(name) { \
537 UChar32 cp; \
538 if (parse_code_point_param(INTERNAL_FUNCTION_PARAM_PASSTHRU, &cp) == FAILURE) { \
539 RETURN_NULL(); \
540 } \
541 RETURN_BOOL(u_##name(cp)); \
542}
543IC_BOOL_METHOD_CHAR(isUAlphabetic)
544IC_BOOL_METHOD_CHAR(isULowercase)
545IC_BOOL_METHOD_CHAR(isUUppercase)
546IC_BOOL_METHOD_CHAR(isUWhiteSpace)
547IC_BOOL_METHOD_CHAR(islower)
548IC_BOOL_METHOD_CHAR(isupper)
549IC_BOOL_METHOD_CHAR(istitle)
550IC_BOOL_METHOD_CHAR(isdigit)
551IC_BOOL_METHOD_CHAR(isalpha)
552IC_BOOL_METHOD_CHAR(isalnum)
553IC_BOOL_METHOD_CHAR(isxdigit)
554IC_BOOL_METHOD_CHAR(ispunct)
555IC_BOOL_METHOD_CHAR(isgraph)
556IC_BOOL_METHOD_CHAR(isblank)
557IC_BOOL_METHOD_CHAR(isdefined)
558IC_BOOL_METHOD_CHAR(isspace)
559IC_BOOL_METHOD_CHAR(isJavaSpaceChar)
560IC_BOOL_METHOD_CHAR(isWhitespace)
561IC_BOOL_METHOD_CHAR(iscntrl)
562IC_BOOL_METHOD_CHAR(isISOControl)
563IC_BOOL_METHOD_CHAR(isprint)
565IC_BOOL_METHOD_CHAR(isMirrored)
566IC_BOOL_METHOD_CHAR(isIDStart)
567IC_BOOL_METHOD_CHAR(isIDPart)
568IC_BOOL_METHOD_CHAR(isIDIgnorable)
569IC_BOOL_METHOD_CHAR(isJavaIDStart)
570IC_BOOL_METHOD_CHAR(isJavaIDPart)
571#undef IC_BOOL_METHOD_CHAR
572/* }}} */
573
574/* {{{ */
575#define IC_INT_METHOD_CHAR(name) \
576IC_METHOD(name) { \
577 UChar32 cp; \
578 if (parse_code_point_param(INTERNAL_FUNCTION_PARAM_PASSTHRU, &cp) == FAILURE) { \
579 RETURN_NULL(); \
580 } \
581 RETURN_LONG(u_##name(cp)); \
582}
583IC_INT_METHOD_CHAR(charDirection)
584IC_INT_METHOD_CHAR(charType)
585IC_INT_METHOD_CHAR(getCombiningClass)
586IC_INT_METHOD_CHAR(charDigitValue)
587#undef IC_INT_METHOD_CHAR
588/* }}} */
589
590/* {{{ Returns a utf-8 character if codepoint was passed as a utf-8 sequence
591 * Returns an int otherwise
592 */
593#define IC_CHAR_METHOD_CHAR(name) \
594IC_METHOD(name) { \
595 UChar32 cp, ret; \
596 zend_string *string_codepoint; \
597 zend_long int_codepoint = -1; \
598 ZEND_PARSE_PARAMETERS_START(1, 1) \
599 Z_PARAM_STR_OR_LONG(string_codepoint, int_codepoint) \
600 ZEND_PARSE_PARAMETERS_END(); \
601 if (convert_cp(&cp, string_codepoint, int_codepoint) == FAILURE) { \
602 RETURN_NULL(); \
603 } \
604 ret = u_##name(cp); \
605 if (string_codepoint != NULL) { \
606 char buffer[5]; \
607 int buffer_len = 0; \
608 U8_APPEND_UNSAFE(buffer, buffer_len, ret); \
609 buffer[buffer_len] = 0; \
610 RETURN_STRINGL(buffer, buffer_len); \
611 } else { \
612 RETURN_LONG(ret); \
613 } \
614}
615IC_CHAR_METHOD_CHAR(charMirror)
616IC_CHAR_METHOD_CHAR(tolower)
617IC_CHAR_METHOD_CHAR(toupper)
618IC_CHAR_METHOD_CHAR(totitle)
619#if U_ICU_VERSION_MAJOR_NUM >= 52
620IC_CHAR_METHOD_CHAR(getBidiPairedBracket)
621#endif /* ICU >= 52 */
622#undef IC_CHAR_METHOD_CHAR
623/* }}} */
624
626 register_class_IntlChar();
627
628 return SUCCESS;
629}
ord(string $character)
chr(int $codepoint)
const U_INTERNAL_PROGRAM_ERROR
const U_ILLEGAL_ARGUMENT_ERROR
const U_ZERO_ERROR
error($message)
Definition ext_skel.php:22
zend_ffi_type * type
Definition ffi.c:3812
buf start
Definition ffi.c:4687
#define NULL
Definition gdcache.h:45
#define SUCCESS
Definition hash_sha3.c:261
zend_string * intl_convert_utf16_to_utf8(const UChar *src, int32_t src_len, UErrorCode *status)
#define INTL_CHECK_STATUS(err, msg)
Definition intl_data.h:47
#define INTL_CHECK_STATUS_OR_NULL(err, msg)
Definition intl_data.h:56
void intl_errors_set_custom_msg(intl_error *err, const char *msg, int copyMsg)
Definition intl_error.c:187
void intl_error_set_code(intl_error *err, UErrorCode err_code)
Definition intl_error.c:141
void intl_error_set_custom_msg(intl_error *err, const char *msg, int copyMsg)
Definition intl_error.c:90
PHP_JSON_API size_t int options
Definition php_json.h:102
zend_fcall_info_cache fci_cache
Definition uchar.c:268
zend_fcall_info fci
Definition uchar.c:267
zend_fcall_info_cache fci_cache
Definition uchar.c:163
zend_fcall_info fci
Definition uchar.c:162
Definition file.h:177
Definition dce.c:49
#define IC_INT_METHOD_CHAR(name)
Definition uchar.c:575
#define IC_METHOD(mname)
Definition uchar.c:10
int php_uchar_minit(INIT_FUNC_ARGS)
Definition uchar.c:625
struct _enumCharNames_data enumCharNames_data
struct _enumCharType_data enumCharType_data
#define IC_BOOL_METHOD_CHAR(name)
Definition uchar.c:535
#define IC_CHAR_METHOD_CHAR(name)
Definition uchar.c:593
#define INTERNAL_FUNCTION_PARAMETERS
Definition zend.h:49
#define INTERNAL_FUNCTION_PARAM_PASSTHRU
Definition zend.h:50
ZEND_API zend_result add_next_index_long(zval *arg, zend_long n)
Definition zend_API.c:2132
#define Z_PARAM_FUNC(dest_fci, dest_fcc)
Definition zend_API.h:1824
struct _zend_fcall_info_cache zend_fcall_info_cache
#define RETURN_STRING(s)
Definition zend_API.h:1043
#define RETURN_STRINGL(s, l)
Definition zend_API.h:1044
#define ZEND_PARSE_PARAMETERS_END()
Definition zend_API.h:1641
#define RETURN_FALSE
Definition zend_API.h:1058
#define RETURN_DOUBLE(d)
Definition zend_API.h:1038
#define ZEND_PARSE_PARAMETERS_NONE()
Definition zend_API.h:1623
#define RETURN_NULL()
Definition zend_API.h:1036
#define RETVAL_NEW_STR(s)
Definition zend_API.h:1015
#define Z_PARAM_OPTIONAL
Definition zend_API.h:1667
#define Z_PARAM_STRING(dest, dest_len)
Definition zend_API.h:2071
#define ZEND_PARSE_PARAMETERS_END_EX(failure)
Definition zend_API.h:1630
#define ZEND_PARSE_PARAMETERS_START(min_num_args, max_num_args)
Definition zend_API.h:1620
#define Z_PARAM_LONG(dest)
Definition zend_API.h:1896
#define RETURN_LONG(l)
Definition zend_API.h:1037
#define RETURN_BOOL(b)
Definition zend_API.h:1035
#define RETURN_NEW_STR(s)
Definition zend_API.h:1041
struct _zend_fcall_info zend_fcall_info
#define Z_PARAM_STR_OR_LONG(dest_str, dest_long)
Definition zend_API.h:2165
#define RETURN_EMPTY_STRING()
Definition zend_API.h:1047
#define ZVAL_STRINGL(z, s, l)
Definition zend_API.h:952
ZEND_API zend_result zend_call_function(zend_fcall_info *fci, zend_fcall_info_cache *fci_cache)
#define RETURN_TRUE
Definition zend_API.h:1059
#define array_init(arg)
Definition zend_API.h:537
#define efree(ptr)
Definition zend_alloc.h:155
#define safe_emalloc(nmemb, size, offset)
Definition zend_alloc.h:154
struct _zval_struct zval
zval * args
int32_t zend_long
Definition zend_long.h:42
struct _zend_string zend_string
#define INIT_FUNC_ARGS
#define zend_never_inline
#define ZEND_SIZE_T_INT_OVFL(size)
#define ZSTR_VAL(zstr)
Definition zend_string.h:68
#define ZSTR_LEN(zstr)
Definition zend_string.h:69
#define ZVAL_NULL(z)
#define ZVAL_LONG(z, l)
@ FAILURE
Definition zend_types.h:61
ZEND_API void zval_ptr_dtor(zval *zval_ptr)
zval retval
zval * return_value
zend_string * name
zval * ret
value
property