An example of iconv

This is a simple example in C of the "iconv" library. This example converts a string in Japanese EUC encoding into UTF-8 (Eight-bit Unicode Transmission Format) encoding. It demonstrates how iconv works by printing out the binary encoded strings.

#include <stdio.h>
#include <stdlib.h>
#include <stdarg.h>
#include <iconv.h>
#include <errno.h>
#include <string.h>

/* Print the hexadecimal bytes. */

void
showhex (const char * what, const char * a, int len)
{
    int i;
    printf ("%s: ", what);
    for (i = 0; i < len; i++) {
	printf ("%02X", (unsigned char) a[i]);
	if (i < len - 1) {
	    printf (" ");
	}
    }
    printf ("\n");
}

/* Display values, for the purpose of showing what this is doing. */

void
show_values (const char * before_after,
	     const char * euc_start,   int len_start,
	     const char * utf8_start,  int utf8len_start)
{
    printf ("%s:\n", before_after);
    showhex ("EUC-JP string", euc_start, len_start);
    showhex ("UTF-8 string", utf8_start, utf8len_start);
}

/* The names of the input and output encodings. */

const char * EUCSET = "EUC-JP";
const char * OUTSET = "UTF-8";

/* Initialize the library. */

iconv_t
initialize (void)
{
    iconv_t conv_desc;
    conv_desc = iconv_open (OUTSET, EUCSET);
    if ((int) conv_desc == -1) {
	/* Initialization failure. */
	if (errno == EINVAL) {
	    fprintf (stderr,
		     "Conversion from '%s' to '%s' is not supported.\n",
		     EUCSET, OUTSET);
	}
	else {
	    fprintf (stderr, "Initialization failure: %s\n",
		     strerror (errno));
	}
	// exit ok
	exit (1);
    }
    return conv_desc;
}


/* Convert EUC into UTF-8 using the iconv library. */

char *
euc2utf8 (iconv_t conv_desc, const char * euc)
{
    size_t iconv_value;
    char * utf8;
    unsigned int len;
    unsigned int utf8len;
    /* The variables with "start" in their name are solely for display
       of what the function is doing. As iconv runs, it alters the
       values of the variables, so these are for keeping track of the
       start points and start lengths. */
    char * utf8start;
    const char * euc_start;
    int len_start;
    int utf8len_start;

    len = strlen (euc);
    if (! len) {
	fprintf (stderr, "Input string is empty.\n");
	return 0;
    }
    /* Assign enough space to put the UTF-8. */
    utf8len = 2*len;
    utf8 = calloc (utf8len, sizeof (char));
    if (! utf8) {
	fprintf (stderr, "Calloc failed.\n");
	return 0;
    }
    /* Keep track of the variables. */
    len_start = len;
    utf8len_start = utf8len;
    utf8start = utf8;
    euc_start = euc;
    /* Display what is in the variables before calling iconv. */
    show_values ("before",
		 euc_start, len_start,
		 utf8start, utf8len_start);
    iconv_value = iconv (conv_desc, & euc, & len, & utf8, & utf8len);
    /* Handle failures. */
    if (iconv_value == (size_t) -1) {
	fprintf (stderr, "iconv failed: in string '%s', length %d, "
		"out string '%s', length %d\n",
		 euc, len, utf8start, utf8len);
	switch (errno) {
	    /* See "man 3 iconv" for an explanation. */
	case EILSEQ:
	    fprintf (stderr, "Invalid multibyte sequence.\n");
	    break;
	case EINVAL:
	    fprintf (stderr, "Incomplete multibyte sequence.\n");
	    break;
	case E2BIG:
	    fprintf (stderr, "No more room.\n");
	    break;
	default:
	    fprintf (stderr, "Error: %s.\n", strerror (errno));
	}
	// exit ok
	exit (1);
    }
    /* Display what is in the variables after calling iconv. */
    show_values ("after",
		 euc_start, len_start,
		 utf8start, utf8len_start);
    return utf8start;
}

/* Close the connection with the library. */

void
finalize (iconv_t conv_desc)
{
    int v;
    v = iconv_close (conv_desc);
    if (v != 0) {
	fprintf (stderr, "iconv_close failed: %s\n", strerror (errno));
	// exit ok
	exit (1);
    }
}

int main ()
{
    char * in_string = "\xB6\xE2ʸ\xC2\xCE";
    char * out_string;
    /* Conversion descriptor. */
    iconv_t conv_desc;

    conv_desc = initialize ();
    out_string = euc2utf8 (conv_desc, in_string);
    finalize (conv_desc);

    if (out_string) {
	printf ("Final iconv output: %s\n", out_string);
	free (out_string);
    }
    return 0;
}

(download)

Note: save the C file as 'euc-jp' (Japanese EUC encoding) in GNU Emacs using "set-buffer-file-coding-system RET euc-jp RET".

To compile this on FreeBSD use

cc -g -o iconv-example -I/usr/local/include -L/usr/local/lib iconv-example.c -liconv

This may produce some compiler warnings due to non-UTF-8 characters.

On older systems, you also need to install the iconv library, which is found in /usr/ports/converters/libiconv/:

cd /usr/ports/converters/libiconv/
make install clean

This is installed on newer versions.

The output of the example program above looks like this, in UTF-8 encoding:

before:
EUC-JP string: B6 E2 CA B8 C2 CE
UTF-8 string: 00 00 00 00 00 00 00 00 00 00 00 00
after:
EUC-JP string: B6 E2 CA B8 C2 CE
UTF-8 string: E9 87 91 E6 96 87 E4 BD 93 00 00 00
Final iconv output: 金文体

This is a relatively simple example so it does not deal with boundary issues caused by halting of conversion at bytes within a character.


Copyright © Ben Bullock 2009-2024. All rights reserved. For comments, questions, and corrections, please email Ben Bullock (benkasminbullock@gmail.com). / Privacy / Disclaimer