php-internal-docs 8.4.8
Unofficial docs for php/php-src
Loading...
Searching...
No Matches
gdkanji.c
Go to the documentation of this file.
1
2/* gdkanji.c (Kanji code converter) */
3/* written by Masahito Yamaga (ma@yama-ga.com) */
4
5#include <stdio.h>
6#include <stdlib.h>
7#include <string.h>
8#include "gd.h"
9#include "gdhelpers.h"
10
11#include <stdarg.h>
12#if defined(HAVE_ICONV_H) || defined(HAVE_ICONV)
13#include <iconv.h>
14#include <errno.h>
15#endif
16
17#if defined(HAVE_ICONV_H) && !defined(HAVE_ICONV)
18#define HAVE_ICONV 1
19#endif
20
21#define LIBNAME "any2eucjp()"
22
23#if defined(__MSC__) || defined(__BORLANDC__) || defined(__TURBOC__) || defined(_Windows) || defined(MSDOS)
24#ifndef SJISPRE
25#define SJISPRE 1
26#endif
27#endif
28
29#ifdef TRUE
30#undef TRUE
31#endif
32#ifdef FALSE
33#undef FALSE
34#endif
35
36#define TRUE 1
37#define FALSE 0
38
39#define NEW 1
40#define OLD 2
41#define ESCI 3
42#define NEC 4
43#define EUC 5
44#define SJIS 6
45#define EUCORSJIS 7
46#define ASCII 8
47
48#define NEWJISSTR "JIS7"
49#define OLDJISSTR "jis"
50#define EUCSTR "eucJP"
51#define SJISSTR "SJIS"
52
53#define ESC 27
54#define SS2 142
55
56static void
57debug (const char *format,...)
58{
59#ifdef DEBUG
60 va_list args;
61
62 va_start (args, format);
63 fprintf (stdout, "%s: ", LIBNAME);
64 vfprintf (stdout, format, args);
65 fprintf (stdout, "\n");
66 va_end (args);
67#endif
68}
69
70static void
71error (const char *format,...)
72{
73 va_list args;
74 char *tmp;
75
76 va_start(args, format);
77 vspprintf(&tmp, 0, format, args);
78 va_end(args);
79 php_error_docref(NULL, E_WARNING, "%s: %s", LIBNAME, tmp);
80 efree(tmp);
81}
82
83/* DetectKanjiCode() derived from DetectCodeType() by Ken Lunde. */
84
85static int
86DetectKanjiCode (unsigned char *str)
87{
88 static int whatcode = ASCII;
89 int oldcode = ASCII;
90 int c, i;
91 char *lang = NULL;
92
93 c = '\1';
94 i = 0;
95
96 if (whatcode != EUCORSJIS && whatcode != ASCII)
97 {
98 oldcode = whatcode;
99 whatcode = ASCII;
100 }
101
102 while ((whatcode == EUCORSJIS || whatcode == ASCII) && c != '\0')
103 {
104 if ((c = str[i++]) != '\0')
105 {
106 if (c == ESC)
107 {
108 c = str[i++];
109 if (c == '$')
110 {
111 c = str[i++];
112 if (c == 'B')
113 whatcode = NEW;
114 else if (c == '@')
115 whatcode = OLD;
116 }
117 else if (c == '(')
118 {
119 c = str[i++];
120 if (c == 'I')
121 whatcode = ESCI;
122 }
123 else if (c == 'K')
124 whatcode = NEC;
125 }
126 else if ((c >= 129 && c <= 141) || (c >= 143 && c <= 159))
127 whatcode = SJIS;
128 else if (c == SS2)
129 {
130 c = str[i++];
131 if ((c >= 64 && c <= 126) || (c >= 128 && c <= 160) || (c >= 224 && c <= 252))
132 whatcode = SJIS;
133 else if (c >= 161 && c <= 223)
134 whatcode = EUCORSJIS;
135 }
136 else if (c >= 161 && c <= 223)
137 {
138 c = str[i++];
139 if (c >= 240 && c <= 254)
140 whatcode = EUC;
141 else if (c >= 161 && c <= 223)
142 whatcode = EUCORSJIS;
143 else if (c >= 224 && c <= 239)
144 {
145 whatcode = EUCORSJIS;
146 while (c >= 64 && whatcode == EUCORSJIS)
147 {
148 if (c >= 129)
149 {
150 if (c <= 141 || (c >= 143 && c <= 159))
151 whatcode = SJIS;
152 else if (c >= 253 && c <= 254)
153 whatcode = EUC;
154 }
155 c = str[i++];
156 }
157 }
158 else if (c <= 159)
159 whatcode = SJIS;
160 }
161 else if (c >= 240 && c <= 254)
162 whatcode = EUC;
163 else if (c >= 224 && c <= 239)
164 {
165 c = str[i++];
166 if ((c >= 64 && c <= 126) || (c >= 128 && c <= 160))
167 whatcode = SJIS;
168 else if (c >= 253 && c <= 254)
169 whatcode = EUC;
170 else if (c >= 161 && c <= 252)
171 whatcode = EUCORSJIS;
172 }
173 }
174 }
175
176#ifdef DEBUG
177 if (whatcode == ASCII)
178 debug ("Kanji code not included.");
179 else if (whatcode == EUCORSJIS)
180 debug ("Kanji code not detected.");
181 else
182 debug ("Kanji code detected at %d byte.", i);
183#endif
184
185 if (whatcode == EUCORSJIS && oldcode != ASCII)
186 whatcode = oldcode;
187
188 if (whatcode == EUCORSJIS)
189 {
190 if (getenv ("LC_ALL"))
191 lang = getenv ("LC_ALL");
192 else if (getenv ("LC_CTYPE"))
193 lang = getenv ("LC_CTYPE");
194 else if (getenv ("LANG"))
195 lang = getenv ("LANG");
196
197 if (lang)
198 {
199 if (strcmp (lang, "ja_JP.SJIS") == 0 ||
200#ifdef hpux
201 strcmp (lang, "japanese") == 0 ||
202#endif
203 strcmp (lang, "ja_JP.mscode") == 0 ||
204 strcmp (lang, "ja_JP.PCK") == 0)
205 whatcode = SJIS;
206 else if (strncmp (lang, "ja", 2) == 0)
207#ifdef SJISPRE
208 whatcode = SJIS;
209#else
210 whatcode = EUC;
211#endif
212 }
213 }
214
215 if (whatcode == EUCORSJIS)
216#ifdef SJISPRE
217 whatcode = SJIS;
218#else
219 whatcode = EUC;
220#endif
221
222 return whatcode;
223}
224
225/* SJIStoJIS() is sjis2jis() by Ken Lunde. */
226
227static void
228SJIStoJIS (int *p1, int *p2)
229{
230 register unsigned char c1 = *p1;
231 register unsigned char c2 = *p2;
232 register int adjust = c2 < 159;
233 register int rowOffset = c1 < 160 ? 112 : 176;
234 register int cellOffset = adjust ? (31 + (c2 > 127)) : 126;
235
236 *p1 = ((c1 - rowOffset) << 1) - adjust;
237 *p2 -= cellOffset;
238}
239
240/* han2zen() was derived from han2zen() written by Ken Lunde. */
241
242#define IS_DAKU(c) ((c >= 182 && c <= 196) || (c >= 202 && c <= 206) || (c == 179))
243#define IS_HANDAKU(c) (c >= 202 && c <= 206)
244
245static void
246han2zen (int *p1, int *p2)
247{
248 int c = *p1;
249 int daku = FALSE;
250 int handaku = FALSE;
251 int mtable[][2] =
252 {
253 {129, 66},
254 {129, 117},
255 {129, 118},
256 {129, 65},
257 {129, 69},
258 {131, 146},
259 {131, 64},
260 {131, 66},
261 {131, 68},
262 {131, 70},
263 {131, 72},
264 {131, 131},
265 {131, 133},
266 {131, 135},
267 {131, 98},
268 {129, 91},
269 {131, 65},
270 {131, 67},
271 {131, 69},
272 {131, 71},
273 {131, 73},
274 {131, 74},
275 {131, 76},
276 {131, 78},
277 {131, 80},
278 {131, 82},
279 {131, 84},
280 {131, 86},
281 {131, 88},
282 {131, 90},
283 {131, 92},
284 {131, 94},
285 {131, 96},
286 {131, 99},
287 {131, 101},
288 {131, 103},
289 {131, 105},
290 {131, 106},
291 {131, 107},
292 {131, 108},
293 {131, 109},
294 {131, 110},
295 {131, 113},
296 {131, 116},
297 {131, 119},
298 {131, 122},
299 {131, 125},
300 {131, 126},
301 {131, 128},
302 {131, 129},
303 {131, 130},
304 {131, 132},
305 {131, 134},
306 {131, 136},
307 {131, 137},
308 {131, 138},
309 {131, 139},
310 {131, 140},
311 {131, 141},
312 {131, 143},
313 {131, 147},
314 {129, 74},
315 {129, 75}
316 };
317
318 if (*p2 == 222 && IS_DAKU (*p1))
319 daku = TRUE; /* Daku-ten */
320 else if (*p2 == 223 && IS_HANDAKU (*p1))
321 handaku = TRUE; /* Han-daku-ten */
322
323 *p1 = mtable[c - 161][0];
324 *p2 = mtable[c - 161][1];
325
326 if (daku)
327 {
328 if ((*p2 >= 74 && *p2 <= 103) || (*p2 >= 110 && *p2 <= 122))
329 (*p2)++;
330 else if (*p2 == 131 || *p2 == 69)
331 *p2 = 148;
332 }
333 else if (handaku && *p2 >= 110 && *p2 <= 122)
334 (*p2) += 2;
335}
336
337/* Recast strcpy to handle unsigned chars used below. */
338#define ustrcpy(A,B) (strcpy((char*)(A),(const char*)(B)))
339
340static void
341do_convert (unsigned char *to, unsigned char *from, const char *code)
342{
343#ifdef HAVE_ICONV
344 iconv_t cd;
345 size_t from_len, to_len;
346
347 if ((cd = iconv_open (EUCSTR, code)) == (iconv_t) - 1)
348 {
349 error ("iconv_open() error");
350 if (errno == EINVAL)
351 error ("invalid code specification: \"%s\" or \"%s\"",
352 EUCSTR, code);
353 strcpy ((char *) to, (const char *) from);
354 return;
355 }
356
357 from_len = strlen ((const char *) from) + 1;
358 to_len = BUFSIZ;
359
360 if ((int) iconv(cd, (char **) &from, &from_len, (char **) &to, &to_len) == -1)
361 {
362 if (errno == EINVAL)
363 error ("invalid end of input string");
364 else if (errno == EILSEQ)
365 error ("invalid code in input string");
366 else if (errno == E2BIG)
367 error ("output buffer overflow at do_convert()");
368 else
369 error ("something happen");
370 strcpy ((char *) to, (const char *) from);
371 return;
372 }
373
374 if (iconv_close (cd) != 0)
375 {
376 error ("iconv_close() error");
377 }
378#else
379 int p1, p2, i, j;
380 int jisx0208 = FALSE;
381 int hankaku = FALSE;
382
383 j = 0;
384 if (strcmp (code, NEWJISSTR) == 0 || strcmp (code, OLDJISSTR) == 0)
385 {
386 for (i = 0; from[i] != '\0' && j < BUFSIZ; i++)
387 {
388 if (from[i] == ESC)
389 {
390 i++;
391 if (from[i] == '$')
392 {
393 jisx0208 = TRUE;
394 hankaku = FALSE;
395 i++;
396 }
397 else if (from[i] == '(')
398 {
399 jisx0208 = FALSE;
400 i++;
401 if (from[i] == 'I') /* Hankaku Kana */
402 hankaku = TRUE;
403 else
404 hankaku = FALSE;
405 }
406 }
407 else
408 {
409 if (jisx0208)
410 to[j++] = from[i] + 128;
411 else if (hankaku)
412 {
413 to[j++] = SS2;
414 to[j++] = from[i] + 128;
415 }
416 else
417 to[j++] = from[i];
418 }
419 }
420 }
421 else if (strcmp (code, SJISSTR) == 0)
422 {
423 for (i = 0; from[i] != '\0' && j < BUFSIZ; i++)
424 {
425 p1 = from[i];
426 if (p1 < 127)
427 to[j++] = p1;
428 else if ((p1 >= 161) && (p1 <= 223))
429 { /* Hankaku Kana */
430 to[j++] = SS2;
431 to[j++] = p1;
432 }
433 else
434 {
435 p2 = from[++i];
436 SJIStoJIS (&p1, &p2);
437 to[j++] = p1 + 128;
438 to[j++] = p2 + 128;
439 }
440 }
441 }
442 else
443 {
444 error ("invalid code specification: \"%s\"", code);
445 return;
446 }
447
448 if (j >= BUFSIZ)
449 {
450 error ("output buffer overflow at do_convert()");
451 ustrcpy (to, from);
452 }
453 else
454 to[j] = '\0';
455#endif /* HAVE_ICONV */
456}
457
458static int
459do_check_and_conv (unsigned char *to, unsigned char *from)
460{
461 static unsigned char tmp[BUFSIZ];
462 int p1, p2, i, j;
463 int kanji = TRUE;
464
465 switch (DetectKanjiCode (from))
466 {
467 case NEW:
468 debug ("Kanji code is New JIS.");
469 do_convert (tmp, from, NEWJISSTR);
470 break;
471 case OLD:
472 debug ("Kanji code is Old JIS.");
473 do_convert (tmp, from, OLDJISSTR);
474 break;
475 case ESCI:
476 debug ("This string includes Hankaku-Kana (jisx0201) escape sequence [ESC] + ( + I.");
477 do_convert (tmp, from, NEWJISSTR);
478 break;
479 case NEC:
480 debug ("Kanji code is NEC Kanji.");
481 error ("cannot convert NEC Kanji.");
482 ustrcpy (tmp, from);
483 kanji = FALSE;
484 break;
485 case EUC:
486 debug ("Kanji code is EUC.");
487 ustrcpy (tmp, from);
488 break;
489 case SJIS:
490 debug ("Kanji code is SJIS.");
491 do_convert (tmp, from, SJISSTR);
492 break;
493 case EUCORSJIS:
494 debug ("Kanji code is EUC or SJIS.");
495 ustrcpy (tmp, from);
496 kanji = FALSE;
497 break;
498 case ASCII:
499 debug ("This is ASCII string.");
500 ustrcpy (tmp, from);
501 kanji = FALSE;
502 break;
503 default:
504 debug ("This string includes unknown code.");
505 ustrcpy (tmp, from);
506 kanji = FALSE;
507 break;
508 }
509
510 /* Hankaku Kana ---> Zenkaku Kana */
511 if (kanji)
512 {
513 j = 0;
514 for (i = 0; tmp[i] != '\0' && j < BUFSIZ; i++)
515 {
516 if (tmp[i] == SS2)
517 {
518 p1 = tmp[++i];
519 if (tmp[i + 1] == SS2)
520 {
521 p2 = tmp[i + 2];
522 if (p2 == 222 || p2 == 223)
523 i += 2;
524 else
525 p2 = 0;
526 }
527 else
528 p2 = 0;
529 han2zen (&p1, &p2);
530 SJIStoJIS (&p1, &p2);
531 to[j++] = p1 + 128;
532 to[j++] = p2 + 128;
533 }
534 else
535 to[j++] = tmp[i];
536 }
537
538 if (j >= BUFSIZ)
539 {
540 error ("output buffer overflow at Hankaku --> Zenkaku");
541 ustrcpy (to, tmp);
542 }
543 else
544 to[j] = '\0';
545 }
546 else
547 ustrcpy (to, tmp);
548
549 return kanji;
550}
551
552int
553any2eucjp (unsigned char *dest, unsigned char *src, unsigned int dest_max)
554{
555 static unsigned char tmp_dest[BUFSIZ];
556 int ret;
557
558 if (strlen ((const char *) src) >= BUFSIZ)
559 {
560 error ("input string too large");
561 return -1;
562 }
563 if (dest_max > BUFSIZ)
564 {
565 error ("invalid maximum size of destination\nit should be less than %d.", BUFSIZ);
566 return -1;
567 }
568 ret = do_check_and_conv (tmp_dest, src);
569 if (strlen ((const char *) tmp_dest) >= dest_max)
570 {
571 error ("output buffer overflow");
572 ustrcpy (dest, src);
573 return -1;
574 }
575 ustrcpy (dest, tmp_dest);
576 return ret;
577}
578
579#if 0
580unsigned int
581strwidth (unsigned char *s)
582{
583 unsigned char *t;
584 unsigned int i;
585
586 t = (unsigned char *) gdMalloc (BUFSIZ);
587 any2eucjp (t, s, BUFSIZ);
588 i = strlen (t);
589 gdFree (t);
590 return i;
591}
592
593#ifdef DEBUG
594int
595main ()
596{
597 unsigned char input[BUFSIZ];
598 unsigned char *output;
599 unsigned char *str;
600 int c, i = 0;
601
602 while ((c = fgetc (stdin)) != '\n' && i < BUFSIZ)
603 input[i++] = c;
604 input[i] = '\0';
605
606 printf ("input : %d bytes\n", strlen ((const char *) input));
607 printf ("output: %d bytes\n", strwidth (input));
608
609 output = (unsigned char *) gdMalloc (BUFSIZ);
610 any2eucjp (output, input, BUFSIZ);
611 str = output;
612 while (*str != '\0')
613 putchar (*(str++));
614 putchar ('\n');
615 gdFree (output);
616
617 return 0;
618}
619#endif
620#endif
fprintf($stream, string $format, mixed ... $values)
getenv(?string $name=null, bool $local_only=false)
fgetc($stream)
printf(string $format, mixed ... $values)
vfprintf($stream, string $format, array $values)
char s[4]
Definition cdf.c:77
error($message)
Definition ext_skel.php:22
#define TRUE
Definition gd_gd.c:7
#define FALSE
Definition gd_gd.c:8
#define NULL
Definition gdcache.h:45
int main(void)
Definition gddemo.c:7
#define gdFree(ptr)
Definition gdhelpers.h:19
#define gdMalloc(size)
Definition gdhelpers.h:16
#define SJIS
Definition gdkanji.c:44
#define OLD
Definition gdkanji.c:40
#define IS_DAKU(c)
Definition gdkanji.c:242
#define IS_HANDAKU(c)
Definition gdkanji.c:243
#define ESC
Definition gdkanji.c:53
#define SJISSTR
Definition gdkanji.c:51
#define SS2
Definition gdkanji.c:54
int any2eucjp(unsigned char *dest, unsigned char *src, unsigned int dest_max)
Definition gdkanji.c:553
#define ASCII
Definition gdkanji.c:46
#define OLDJISSTR
Definition gdkanji.c:49
#define EUCSTR
Definition gdkanji.c:50
#define LIBNAME
Definition gdkanji.c:21
#define NEC
Definition gdkanji.c:42
#define EUC
Definition gdkanji.c:43
#define NEW
Definition gdkanji.c:39
#define ESCI
Definition gdkanji.c:41
#define NEWJISSTR
Definition gdkanji.c:48
#define ustrcpy(A, B)
Definition gdkanji.c:338
#define EUCORSJIS
Definition gdkanji.c:45
again j
iconv(string $from_encoding, string $to_encoding, string $string)
PHPAPI ZEND_COLD void php_error_docref(const char *docref, int type, const char *format,...)
Definition main.c:1173
char * debug
Definition mysqlnd.h:298
#define vspprintf
Definition spprintf.h:31
#define errno
#define efree(ptr)
Definition zend_alloc.h:155
strlen(string $string)
strncmp(string $string1, string $string2, int $length)
strcmp(string $string1, string $string2)
zval * args
#define E_WARNING
Definition zend_errors.h:24
zval * ret