php-internal-docs 8.4.8
Unofficial docs for php/php-src
Loading...
Searching...
No Matches
mbfl_encoding.c
Go to the documentation of this file.
1/*
2 * "streamable kanji code filter and converter"
3 * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
4 *
5 * LICENSE NOTICES
6 *
7 * This file is part of "streamable kanji code filter and converter",
8 * which is distributed under the terms of GNU Lesser General Public
9 * License (version 2) as published by the Free Software Foundation.
10 *
11 * This software is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with "streamable kanji code filter and converter";
18 * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
19 * Suite 330, Boston, MA 02111-1307 USA
20 *
21 * The author of this file:
22 *
23 */
24/*
25 * The source code included in this files was separated from mbfilter.c
26 * by Moriyoshi Koizumi <moriyoshi@php.net> on 20 Dec 2002. The file
27 * mbfilter.c is included in this package .
28 *
29 */
30
31#include "libmbfl/config.h"
32
33#ifdef HAVE_STRINGS_H
34 /* For strcasecmp */
35 #include <strings.h>
36#endif
37
38#include "mbfl_encoding.h"
39#include "mbfilter_pass.h"
40#include "mbfilter_8bit.h"
41
56
57#ifndef HAVE_STRCASECMP
58#ifdef HAVE_STRICMP
59#define strcasecmp stricmp
60#endif
61#endif
62
63
64static const mbfl_encoding *mbfl_encoding_ptr_list[] = {
144 NULL
145};
146
147/* The following perfect hashing table was amended from gperf, and hashing code was generated using gperf.
148 * The table was amended to refer to the table above such that it is lighter for the data cache.
149 * You can use the generate_name_perfect_hash_table.php script to help generate the necessary lookup tables. */
150
151static const int8_t mbfl_encoding_ptr_list_after_hashing[] = {
152 -1, -1, -1, -1,
153 -1, -1,
154 66,
155 -1,
156 73,
157 -1,
158 78,
159 61,
160 76,
161 -1,
162 59,
163 46,
164 52,
165 54,
166 49,
167 57,
168 69,
169 21,
170 50,
171 58,
172 75,
173 35,
174 9,
175 64,
176 48,
177 56,
178 74,
179 47,
180 55,
181 40,
182 45,
183 53,
184 18,
185 39,
186 72,
187 60,
188 23,
189 10,
190 30,
191 36,
192 67,
193 71,
194 37,
195 27,
196 77,
197 26,
198 51,
199 12,
200 6,
201 11,
202 7,
203 29,
204 5,
205 24,
206 0,
207 2,
208 13,
209 43,
210 31,
211 33,
212 38,
213 63,
214 8,
215 1,
216 15,
217 -1,
218 16,
219 -1,
220 14,
221 3,
222 44,
223 -1,
224 20,
225 -1,
226 32,
227 -1,
228 68,
229 25,
230 17,
231 28,
232 -1, -1, -1,
233 22,
234 -1, -1,
235 4,
236 -1, -1,
237 62,
238 -1, -1,
239 34,
240 -1,
241 41,
242 -1, -1, -1,
243 42,
244 70,
245 19,
246 -1, -1, -1,
247 65
248};
249
250static unsigned int mbfl_name2encoding_perfect_hash(const char *str, size_t len)
251{
252 static const unsigned char asso_values[] =
253 {
254 109, 109, 109, 109, 109, 109, 109, 109, 109, 109,
255 109, 109, 109, 109, 109, 109, 109, 109, 109, 109,
256 109, 109, 109, 109, 109, 109, 109, 109, 109, 109,
257 109, 109, 109, 109, 109, 109, 109, 109, 109, 109,
258 109, 109, 109, 109, 109, 1, 109, 109, 1, 19,
259 0, 16, 13, 3, 7, 35, 1, 20, 109, 109,
260 109, 109, 109, 109, 109, 16, 1, 0, 44, 6,
261 26, 53, 8, 0, 25, 32, 13, 12, 1, 0,
262 25, 0, 32, 18, 51, 3, 109, 15, 109, 109,
263 1, 109, 109, 109, 109, 109, 109, 16, 1, 0,
264 44, 6, 26, 53, 8, 0, 25, 32, 13, 12,
265 1, 0, 25, 0, 32, 18, 51, 3, 109, 15,
266 109, 109, 1, 109, 109, 109, 109, 109, 109, 109,
267 109, 109, 109, 109, 109, 109, 109, 109, 109, 109,
268 109, 109, 109, 109, 109, 109, 109, 109, 109, 109,
269 109, 109, 109, 109, 109, 109, 109, 109, 109, 109,
270 109, 109, 109, 109, 109, 109, 109, 109, 109, 109,
271 109, 109, 109, 109, 109, 109, 109, 109, 109, 109,
272 109, 109, 109, 109, 109, 109, 109, 109, 109, 109,
273 109, 109, 109, 109, 109, 109, 109, 109, 109, 109,
274 109, 109, 109, 109, 109, 109, 109, 109, 109, 109,
275 109, 109, 109, 109, 109, 109, 109, 109, 109, 109,
276 109, 109, 109, 109, 109, 109, 109, 109, 109, 109,
277 109, 109, 109, 109, 109, 109, 109, 109, 109, 109,
278 109, 109, 109, 109, 109, 109, 109, 109, 109, 109,
279 109, 109, 109, 109, 109, 109
280 };
281 unsigned int hval = len;
282
283 switch (hval)
284 {
285 default:
286 hval += asso_values[(unsigned char)str[6]];
288 case 6:
289 hval += asso_values[(unsigned char)str[5]];
291 case 5:
292 hval += asso_values[(unsigned char)str[4]];
294 case 4:
295 case 3:
296 hval += asso_values[(unsigned char)str[2]];
298 case 2:
299 case 1:
300 hval += asso_values[(unsigned char)str[0]];
301 break;
302 }
303 return hval + asso_values[(unsigned char)str[len - 1]];
304}
305
306#define NAME_HASH_MIN_NAME_LENGTH 2
307#define NAME_HASH_MAX_NAME_LENGTH 23
308
310{
312}
313
314const mbfl_encoding *mbfl_name2encoding_ex(const char *name, size_t name_len)
315{
316 const mbfl_encoding *const *encoding;
317
318 /* Sanity check perfect hash for name.
319 * Never enable this in production, this is only a development-time sanity check! */
320#if ZEND_DEBUG && 0
321 for (encoding = mbfl_encoding_ptr_list; *encoding; encoding++) {
322 size_t name_length = strlen((*encoding)->name);
323 if (!(name_length <= NAME_HASH_MAX_NAME_LENGTH && name_length >= NAME_HASH_MIN_NAME_LENGTH)) {
324 fprintf(stderr, "name length is not satisfying bound check: %zu %s\n", name_length, (*encoding)->name);
325 abort();
326 }
327 unsigned int key = mbfl_name2encoding_perfect_hash((*encoding)->name, name_length);
328 if (mbfl_encoding_ptr_list[mbfl_encoding_ptr_list_after_hashing[key]] != *encoding) {
329 fprintf(stderr, "mbfl_name2encoding_perfect_hash: key %u %s mismatch\n", key, (*encoding)->name);
330 abort();
331 }
332 }
333#endif
334
335 /* Use perfect hash lookup for name */
336 if (name_len <= NAME_HASH_MAX_NAME_LENGTH && name_len >= NAME_HASH_MIN_NAME_LENGTH) {
337 unsigned int key = mbfl_name2encoding_perfect_hash(name, name_len);
338 if (key < sizeof(mbfl_encoding_ptr_list_after_hashing) / sizeof(mbfl_encoding_ptr_list_after_hashing[0])) {
339 int8_t offset = mbfl_encoding_ptr_list_after_hashing[key];
340 if (offset >= 0) {
341 encoding = mbfl_encoding_ptr_list + offset;
342 if (strncasecmp((*encoding)->name, name, name_len) == 0) {
343 return *encoding;
344 }
345 }
346 }
347 }
348
349 /* search MIME charset name */
350 for (encoding = mbfl_encoding_ptr_list; *encoding; encoding++) {
351 if ((*encoding)->mime_name) {
352 if (strncasecmp((*encoding)->mime_name, name, name_len) == 0 && (*encoding)->mime_name[name_len] == '\0') {
353 return *encoding;
354 }
355 }
356 }
357
358 /* search aliases */
359 for (encoding = mbfl_encoding_ptr_list; *encoding; encoding++) {
360 if ((*encoding)->aliases) {
361 for (const char **alias = (*encoding)->aliases; *alias; alias++) {
362 if (strncasecmp(name, *alias, name_len) == 0 && (*alias)[name_len] == '\0') {
363 return *encoding;
364 }
365 }
366 }
367 }
368
369 return NULL;
370}
371
373{
374 const mbfl_encoding **encoding;
375
376 for (encoding = mbfl_encoding_ptr_list; *encoding; encoding++) {
377 if ((*encoding)->no_encoding == no_encoding) {
378 return *encoding;
379 }
380 }
381
382 return NULL;
383}
384
385const char *mbfl_no_encoding2name(enum mbfl_no_encoding no_encoding)
386{
387 const mbfl_encoding *encoding = mbfl_no2encoding(no_encoding);
388 return encoding ? encoding->name : "";
389}
390
392{
393 return mbfl_encoding_ptr_list;
394}
395
397{
398 if (encoding->mime_name && encoding->mime_name[0] != '\0') {
399 return encoding->mime_name;
400 }
401 return NULL;
402}
size_t len
Definition apprentice.c:174
fprintf($stream, string $format, mixed ... $values)
zend_long offset
#define NULL
Definition gdcache.h:45
const mbfl_encoding mbfl_encoding_7bit
const mbfl_encoding mbfl_encoding_8bit
const mbfl_encoding mbfl_encoding_base64
const mbfl_encoding mbfl_encoding_hz
const mbfl_encoding mbfl_encoding_sjis
const mbfl_encoding mbfl_encoding_sjiswin
const mbfl_encoding mbfl_encoding_cp932
const mbfl_encoding mbfl_encoding_uhc
const mbfl_encoding mbfl_encoding_euc_kr
const mbfl_encoding mbfl_encoding_cp50221
const mbfl_encoding mbfl_encoding_gb18030_2022
const mbfl_encoding mbfl_encoding_sjis2004
const mbfl_encoding mbfl_encoding_sjis_docomo
const mbfl_encoding mbfl_encoding_euc_cn
const mbfl_encoding mbfl_encoding_cp50220
const mbfl_encoding mbfl_encoding_big5
const mbfl_encoding mbfl_encoding_cp50222
const mbfl_encoding mbfl_encoding_2022jpms
const mbfl_encoding mbfl_encoding_cp51932
const mbfl_encoding mbfl_encoding_2022jp_2004
const mbfl_encoding mbfl_encoding_sjis_sb
const mbfl_encoding mbfl_encoding_cp936
const mbfl_encoding mbfl_encoding_gb18030
const mbfl_encoding mbfl_encoding_eucjp2004
const mbfl_encoding mbfl_encoding_jis
const mbfl_encoding mbfl_encoding_2022jp
const mbfl_encoding mbfl_encoding_2022jp_kddi
const mbfl_encoding mbfl_encoding_eucjp_win
const mbfl_encoding mbfl_encoding_euc_tw
const mbfl_encoding mbfl_encoding_cp950
const mbfl_encoding mbfl_encoding_euc_jp
const mbfl_encoding mbfl_encoding_sjis_kddi
const mbfl_encoding mbfl_encoding_2022kr
const mbfl_encoding mbfl_encoding_sjis_mac
const mbfl_encoding mbfl_encoding_html_ent
const mbfl_encoding mbfl_encoding_qprint
const mbfl_encoding mbfl_encoding_8859_10
const mbfl_encoding mbfl_encoding_8859_6
const mbfl_encoding mbfl_encoding_8859_16
const mbfl_encoding mbfl_encoding_8859_8
const mbfl_encoding mbfl_encoding_8859_2
const mbfl_encoding mbfl_encoding_8859_5
const mbfl_encoding mbfl_encoding_cp850
const mbfl_encoding mbfl_encoding_8859_13
const mbfl_encoding mbfl_encoding_8859_4
const mbfl_encoding mbfl_encoding_cp1252
const mbfl_encoding mbfl_encoding_8859_7
const mbfl_encoding mbfl_encoding_armscii8
const mbfl_encoding mbfl_encoding_koi8u
const mbfl_encoding mbfl_encoding_cp1251
const mbfl_encoding mbfl_encoding_8859_1
const mbfl_encoding mbfl_encoding_koi8r
const mbfl_encoding mbfl_encoding_8859_15
const mbfl_encoding mbfl_encoding_cp866
const mbfl_encoding mbfl_encoding_8859_3
const mbfl_encoding mbfl_encoding_ascii
const mbfl_encoding mbfl_encoding_cp1254
const mbfl_encoding mbfl_encoding_8859_14
const mbfl_encoding mbfl_encoding_8859_9
const mbfl_encoding mbfl_encoding_ucs2
const mbfl_encoding mbfl_encoding_ucs2be
const mbfl_encoding mbfl_encoding_ucs2le
const mbfl_encoding mbfl_encoding_ucs4be
const mbfl_encoding mbfl_encoding_ucs4
const mbfl_encoding mbfl_encoding_ucs4le
const mbfl_encoding mbfl_encoding_utf16be
const mbfl_encoding mbfl_encoding_utf16le
const mbfl_encoding mbfl_encoding_utf16
const mbfl_encoding mbfl_encoding_utf32be
const mbfl_encoding mbfl_encoding_utf32
const mbfl_encoding mbfl_encoding_utf32le
const mbfl_encoding mbfl_encoding_utf7
const mbfl_encoding mbfl_encoding_utf7imap
const mbfl_encoding mbfl_encoding_utf8_kddi_b
const mbfl_encoding mbfl_encoding_utf8_sb
const mbfl_encoding mbfl_encoding_utf8_kddi_a
const mbfl_encoding mbfl_encoding_utf8
const mbfl_encoding mbfl_encoding_utf8_docomo
const mbfl_encoding mbfl_encoding_uuencode
const mbfl_encoding ** mbfl_get_supported_encodings(void)
const mbfl_encoding * mbfl_name2encoding_ex(const char *name, size_t name_len)
const mbfl_encoding * mbfl_name2encoding(const char *name)
#define NAME_HASH_MIN_NAME_LENGTH
const mbfl_encoding * mbfl_no2encoding(enum mbfl_no_encoding no_encoding)
const char * mbfl_encoding_preferred_mime_name(const mbfl_encoding *encoding)
const char * mbfl_no_encoding2name(enum mbfl_no_encoding no_encoding)
mbfl_no_encoding
#define abort()
unsigned char key[REFLECTION_KEY_LEN]
xmlCharEncodingHandlerPtr encoding
Definition php_soap.h:170
strlen(string $string)
#define strncasecmp(s1, s2, n)
#define ZEND_FALLTHROUGH
zend_string * name