This simple routine takes input of a Unicode character code point in
the variable ucs2 and converts it to the UTF-8
encoding. The output buffer, utf8, is assumed to have at
least four bytes of space allocated to write into. The return value is
the number of bytes actually written.
This works as a conversion from UCS-2 or UTF-16 for most code points, but does not deal with the code points from U+D7FF to U+DFFF or those above U+FFFF.
/* Returns no. of bytes written or -1 if error. Adds a zero byte to the end of the string. */ int ucs2_to_utf8 (int ucs2, unsigned char * utf8) { if (ucs2 < 0x80) { utf8[0] = ucs2; utf8[1] = '\0'; return 1; } if (ucs2 >= 0x80 && ucs2 < 0x800) { utf8[0] = (ucs2 >> 6) | 0xC0; utf8[1] = (ucs2 & 0x3F) | 0x80; utf8[2] = '\0'; return 2; } if (ucs2 >= 0x800 && ucs2 < 0xFFFF) { utf8[0] = ((ucs2 >> 12) ) | 0xE0; utf8[1] = ((ucs2 >> 6 ) & 0x3F) | 0x80; utf8[2] = ((ucs2 ) & 0x3F) | 0x80; utf8[3] = '\0'; return 3; } return -1; }