php-internal-docs 8.4.8
Unofficial docs for php/php-src
Loading...
Searching...
No Matches
php_unicode.h
Go to the documentation of this file.
1/*
2 +----------------------------------------------------------------------+
3 | Copyright (c) The PHP Group |
4 +----------------------------------------------------------------------+
5 | This source file is subject to version 3.01 of the PHP license, |
6 | that is bundled with this package in the file LICENSE, and is |
7 | available through the world-wide-web at the following url: |
8 | https://www.php.net/license/3_01.txt |
9 | If you did not receive a copy of the PHP license and are unable to |
10 | obtain it through the world-wide-web, please send a note to |
11 | license@php.net so we can mail you a copy immediately. |
12 +----------------------------------------------------------------------+
13 | Author: Wez Furlong (wez@thebrainroom.com) |
14 +----------------------------------------------------------------------+
15
16 Based on code from ucdata-2.5, which has the following Copyright:
17
18 Copyright 2001 Computing Research Labs, New Mexico State University
19
20 Permission is hereby granted, free of charge, to any person obtaining a
21 copy of this software and associated documentation files (the "Software"),
22 to deal in the Software without restriction, including without limitation
23 the rights to use, copy, modify, merge, publish, distribute, sublicense,
24 and/or sell copies of the Software, and to permit persons to whom the
25 Software is furnished to do so, subject to the following conditions:
26
27 The above copyright notice and this permission notice shall be included in
28 all copies or substantial portions of the Software.
29*/
30
31#ifndef PHP_UNICODE_H
32#define PHP_UNICODE_H
33
34#define UC_MN 0 /* Mark, Non-Spacing */
35#define UC_MC 1 /* Mark, Spacing Combining */
36#define UC_ME 2 /* Mark, Enclosing */
37#define UC_ND 3 /* Number, Decimal Digit */
38#define UC_NL 4 /* Number, Letter */
39#define UC_NO 5 /* Number, Other */
40#define UC_ZS 6 /* Separator, Space */
41#define UC_ZL 7 /* Separator, Line */
42#define UC_ZP 8 /* Separator, Paragraph */
43#define UC_OS 9 /* Other, Surrogate */
44#define UC_CO 10 /* Other, Private Use */
45#define UC_CN 11 /* Other, Not Assigned */
46#define UC_LU 12 /* Letter, Uppercase */
47#define UC_LL 13 /* Letter, Lowercase */
48#define UC_LT 14 /* Letter, Titlecase */
49#define UC_LM 15 /* Letter, Modifier */
50#define UC_LO 16 /* Letter, Other */
51#define UC_SM 17 /* Symbol, Math */
52#define UC_SC 18 /* Symbol, Currency */
53#define UC_SK 19 /* Symbol, Modifier */
54#define UC_SO 20 /* Symbol, Other */
55#define UC_L 21 /* Left-To-Right */
56#define UC_R 22 /* Right-To-Left */
57#define UC_EN 23 /* European Number */
58#define UC_ES 24 /* European Number Separator */
59#define UC_ET 25 /* European Number Terminator */
60#define UC_AN 26 /* Arabic Number */
61#define UC_CS 27 /* Common Number Separator */
62#define UC_B 28 /* Block Separator */
63#define UC_S 29 /* Segment Separator */
64#define UC_WS 30 /* Whitespace */
65#define UC_ON 31 /* Other Neutrals */
66#define UC_AL 32 /* Arabic Letter */
67
68/* Merged property categories */
69#define UC_C 33 /* Control */
70#define UC_P 34 /* Punctuation */
71
72/* Derived properties from DerivedCoreProperties.txt */
73#define UC_CASED 35
74#define UC_CASE_IGNORABLE 36
75
76
77MBSTRING_API bool php_unicode_is_prop(unsigned long code, ...);
78MBSTRING_API bool php_unicode_is_prop1(unsigned long code, int prop);
79
91
93 php_case_mode case_mode, const char *srcstr, size_t srclen,
94 const mbfl_encoding *src_encoding, const mbfl_encoding *dst_encoding, int illegal_mode, uint32_t illegal_substchar);
95
96/* Optimize the common ASCII case for lower/upper */
97
98static inline int php_unicode_is_lower(unsigned long code) {
99 if (code < 0x80) {
100 return code >= 0x61 && code <= 0x7A;
101 } else {
102 return php_unicode_is_prop1(code, UC_LL);
103 }
104}
105
106static inline int php_unicode_is_upper(unsigned long code) {
107 if (code < 0x80) {
108 return code >= 0x41 && code <= 0x5A;
109 } else {
110 return php_unicode_is_prop1(code, UC_LU);
111 }
112}
113
114#define php_unicode_is_alpha(cc) php_unicode_is_prop(cc, UC_LU, UC_LL, UC_LM, UC_LO, UC_LT, -1)
115#define php_unicode_is_digit(cc) php_unicode_is_prop1(cc, UC_ND)
116#define php_unicode_is_alnum(cc) php_unicode_is_prop(cc, UC_LU, UC_LL, UC_LM, UC_LO, UC_LT, UC_ND, -1)
117#define php_unicode_is_cntrl(cc) php_unicode_is_prop1(cc, UC_C)
118#define php_unicode_is_blank(cc) php_unicode_is_prop1(cc, UC_ZS)
119#define php_unicode_is_punct(cc) php_unicode_is_prop1(cc, UC_P)
120#define php_unicode_is_graph(cc) php_unicode_is_prop(cc, \
121 UC_MN, UC_MC, UC_ME, UC_ND, UC_NL, UC_NO, \
122 UC_LU, UC_LL, UC_LT, UC_LM, UC_LO, UC_P, \
123 UC_SM, UC_SM, UC_SC, UC_SK, UC_SO, -1)
124#define php_unicode_is_print(cc) php_unicode_is_prop(cc, \
125 UC_MN, UC_MC, UC_ME, UC_ND, UC_NL, UC_NO, \
126 UC_LU, UC_LL, UC_LT, UC_LM, UC_LO, UC_P, \
127 UC_SM, UC_SM, UC_SC, UC_SK, UC_SO, UC_ZS, -1)
128#define php_unicode_is_title(cc) php_unicode_is_prop1(cc, UC_LT)
129
130#define php_unicode_is_symbol(cc) php_unicode_is_prop(cc, UC_SM, UC_SC, UC_SO, UC_SK, -1)
131#define php_unicode_is_number(cc) php_unicode_is_prop(cc, UC_ND, UC_NO, UC_NL, -1)
132#define php_unicode_is_nonspacing(cc) php_unicode_is_prop1(cc, UC_MN)
133
134/*
135 * Directionality macros.
136 */
137#define php_unicode_is_rtl(cc) php_unicode_is_prop1(cc, UC_R)
138#define php_unicode_is_ltr(cc) php_unicode_is_prop1(cc, UC_L)
139#define php_unicode_is_strong(cc) php_unicode_is_prop(cc, UC_L, UC_R, -1)
140#define php_unicode_is_weak(cc) php_unicode_is_prop(cc, UC_EN, UC_ES, UC_ET, UC_AN, UC_CS, -1)
141#define php_unicode_is_neutral(cc) php_unicode_is_prop(cc, UC_B, UC_S, UC_WS, UC_ON, -1)
142#define php_unicode_is_separator(cc) php_unicode_is_prop(cc, UC_B, UC_S, -1)
143
144/*
145 * Other macros inspired by John Cowan.
146 */
147#define php_unicode_is_mark(cc) php_unicode_is_prop(cc, UC_MN, UC_MC, UC_ME, -1)
148#define php_unicode_is_modif(cc) php_unicode_is_prop1(cc, UC_LM)
149#define php_unicode_is_letnum(cc) php_unicode_is_prop1(cc, UC_NL)
150#define php_unicode_is_math(cc) php_unicode_is_prop1(cc, UC_SM)
151#define php_unicode_is_currency(cc) php_unicode_is_prop1(cc, UC_SC)
152#define php_unicode_is_modifsymbol(cc) php_unicode_is_prop1(cc, UC_SK)
153#define php_unicode_is_nsmark(cc) php_unicode_is_prop1(cc, UC_MN)
154#define php_unicode_is_spmark(cc) php_unicode_is_prop1(cc, UC_MC)
155#define php_unicode_is_enclosing(cc) php_unicode_is_prop1(cc, UC_ME)
156#define php_unicode_is_private(cc) php_unicode_is_prop1(cc, UC_CO)
157#define php_unicode_is_surrogate(cc) php_unicode_is_prop1(cc, UC_OS)
158#define php_unicode_is_lsep(cc) php_unicode_is_prop1(cc, UC_ZL)
159#define php_unicode_is_psep(cc) php_unicode_is_prop1(cc, UC_ZP)
160
161/*
162 * Other miscellaneous character property macros.
163 */
164#define php_unicode_is_han(cc) (((cc) >= 0x4e00 && (cc) <= 0x9fff) ||\
165 ((cc) >= 0xf900 && (cc) <= 0xfaff))
166#define php_unicode_is_hangul(cc) ((cc) >= 0xac00 && (cc) <= 0xd7ff)
167
168/*
169 * Derived core properties.
170 */
171
172#define php_unicode_is_cased(cc) php_unicode_is_prop1(cc, UC_CASED)
173#define php_unicode_is_case_ignorable(cc) php_unicode_is_prop1(cc, UC_CASE_IGNORABLE)
174
175#endif /* PHP_UNICODE_H */
#define MBSTRING_API
Definition mbstring.h:39
#define UC_LU
Definition php_unicode.h:46
#define UC_LL
Definition php_unicode.h:47
MBSTRING_API zend_string * php_unicode_convert_case(php_case_mode case_mode, const char *srcstr, size_t srclen, const mbfl_encoding *src_encoding, const mbfl_encoding *dst_encoding, int illegal_mode, uint32_t illegal_substchar)
MBSTRING_API bool php_unicode_is_prop1(unsigned long code, int prop)
Definition php_unicode.c:64
MBSTRING_API bool php_unicode_is_prop(unsigned long code,...)
Definition php_unicode.c:69
php_case_mode
Definition php_unicode.h:80
@ PHP_UNICODE_CASE_TITLE
Definition php_unicode.h:83
@ PHP_UNICODE_CASE_FOLD
Definition php_unicode.h:84
@ PHP_UNICODE_CASE_LOWER_SIMPLE
Definition php_unicode.h:86
@ PHP_UNICODE_CASE_FOLD_SIMPLE
Definition php_unicode.h:88
@ PHP_UNICODE_CASE_LOWER
Definition php_unicode.h:82
@ PHP_UNICODE_CASE_UPPER_SIMPLE
Definition php_unicode.h:85
@ PHP_UNICODE_CASE_TITLE_SIMPLE
Definition php_unicode.h:87
@ PHP_UNICODE_CASE_UPPER
Definition php_unicode.h:81
@ PHP_UNICODE_CASE_MODE_MAX
Definition php_unicode.h:89
struct _zend_string zend_string