#include #include #include int kata2hira (const unsigned char * katakana, unsigned char ** hiragana_ptr) { unsigned char * h; unsigned char * hiragana; hiragana = (unsigned char *) strdup ((const char *)katakana); h = hiragana; while (* h) { /* Check that this is within the katakana block from E3 82 A0 to E3 83 BF. */ if (h[0] == 0xe3 && (h[1] == 0x82 || h[1] == 0x83) && h[2] != '\0') { /* Check that this is within the range of katakana which can be converted into hiragana. */ if ((h[1] == 0x82 && h[2] >= 0xa1) || (h[1] == 0x83 && h[2] <= 0xb6) || (h[1] == 0x83 && (h[2] == 0xbd || h[2] == 0xbe))) { /* Byte conversion from katakana to hiragana. */ if (h[2] >= 0xa0) { h[1] = h[1] - 1; h[2] -= 0x20; } else { h[1] = h[1] - 2; h[2] += 0x20; } } h += 3; } else { h++; } } * hiragana_ptr = hiragana; return 0; } int main () { int i; const char * tests[] = { "サク:k] 3211 [ノミ:h] 453 [ウガツ:h] ", "钁: 886 [カク:k] 1210 [キャク:k] ", "鸚: 437 [イン:k] 644 [オウ:k] 4403 [ヨウ:k] ", "鸛: 1101 [カン:k] 1500 [コウノトリ:h] ", "鬱: 514 [ウツ:k] 512 [ウッスル:h] 3613 [フサグ:h] 1853 [シゲル:h] ", "爨: 1791 [サン:k] 922 [カシグ:h] 1035 [カマド:h] ", /* First row of unicode katakana. */ "゠ァアィイゥウェエォオカガキギク", /* Last row of unicode katakana. */ "ヰヱヲンヴヵヶヷヸヹヺ・ーヽヾヿ", }; int n_tests = sizeof (tests) / sizeof (tests[0]); for (i = 0; i < n_tests; i++) { unsigned char * hiragana; kata2hira ((const unsigned char*) tests[i], & hiragana); printf ("K: %s\nH: %s\n", tests[i], hiragana); free (hiragana); } return 0; }