How to decode broken doubly-encoded UTF-8 in Perl

This example program shows how to decode broken UTF-8 encoding which has been encoded such that each byte has turned into a Unicode character.

use warnings;
use strict;
use utf8;
use FindBin '$Bin';
use Unicode::UTF8 'decode_utf8';
binmode STDOUT, ":encoding(utf8)";
my $broken = 'From the female squash champion fleeing the Taliban, to the gay popstar who became a hit back in macho Brazil, it’s often Canada’s newcomers who can tell its most dramatic stories';
my $ok = fix_title ($broken);
print "$ok\n";

sub fix_title
    my ($title) = @_;
    my @chars = split //, $title;
    my $oktitle = '';
    for my $char (@chars) {
        #print "$char";
        my $o = ord ($char);
        if ($o < 256) {
            $oktitle .= pack ("C", $o);
        else {
            warn "large character $o in $title";
            return $title;
    #print "\n";
    $oktitle = decode_utf8 ($oktitle);
    return $oktitle;


Copyright © Ben Bullock 2009-2024. All rights reserved. For comments, questions, and corrections, please email Ben Bullock ( or use the discussion group at Google Groups. / Privacy / Disclaimer