#!/home/ben/software/install/bin/perl # Test various file-to-mime programs on CPAN. use warnings; use strict; use utf8; use FindBin '$Bin'; use Unicode::UTF8 'valid_utf8'; use File::Slurper 'read_binary'; use Table::Readable 'read_table'; use HTML::Make; use Getopt::Long; use List::UtilsBy 'nsort_by'; # Candidate modules use File::LibMagic; use File::MMagic; use File::MMagic::XS ':compat'; use File::MimeInfo; use File::Type; use File::Type::WebImages (); use File::LibMagic::FFI; use Media::Type::Simple; use MIME::Types; use MIME::Type::FileName; GetOptions ( html => \my $html, mimedetect => \my $mimedetect, ); if ($mimedetect) { eval "use MIME::Detect;"; } my $flm = File::LibMagic->new (); my $mm = new File::MMagic; my $mmx = File::MMagic::XS->new (); my $ft = File::Type->new(); my $mt = MIME::Types->new(); my $mime; if ($mimedetect) { $mime = MIME::Detect->new(); } my $magic = File::LibMagic::FFI->new (); my @files = read_table ("$Bin/good-bad.txt"); my @fresults; # Number of the mime types for each module/method which are correct. my %score; # Total of known mime types my $total; for my $entry (@files) { my %results; $results{desc} = $entry->{desc}; my $tfile = $entry->{file}; my $i = $flm->info_from_filename ($tfile); $results{'File::LibMagic mime type'} = $i->{mime_type}; $results{'File::LibMagic encoding'} = $i->{encoding}; my $res = $mm->checktype_filename ($tfile); $results{'File::MMagic'} = $res; my $resxs = $mmx->checktype_filename ($tfile); $results{'File::MMagic::XS'} = $resxs; my $type_from_data = $ft->checktype_filename($tfile); $results{'File::Type'} = $type_from_data; my $mime_type = mimetype($tfile); $results{'File::MimeInfo'} = $mime_type; my $ffi = $magic->checktype_filename($tfile); $ffi =~ s/;.*$//; $results{'File::LibMagic::FFI'} = $ffi; my $ext = $tfile; if ($ext =~ s!^.*\.!!) { # Media::Type::Simple throws an exception when given a header # it doesn't know about. my $media_type; eval { $media_type = type_from_ext ($ext); }; if ($@ && $@ =~ /Unknown extension/) { $media_type = 'Unknown extension'; } $results{'Media::Type::Simple'} = $media_type; } else { $results{'Media::Type::Simple'} = "no extension"; } my $mt_type = $mt->mimeTypeOf($tfile); if (! defined $mt_type) { $mt_type = 'unknown'; } $results{'MIME::Types'} = $mt_type; if ($mimedetect) { my @types = $mime->mime_types($tfile); $results{"MIME::Detect: from file"} = join '; ', (map {$_->mime_type} @types); my @ntypes = $mime->mime_types_from_name ($tfile); $results{"MIME::Detect: from name"} = join '; ', (map {$_->mime_type} @ntypes); } my $ftw = File::Type::WebImages::mime_type ($tfile); if (! defined ($ftw)) { $ftw = 'undefined'; } $results{'File::Type::WebImages'} = $ftw; $results{'MIME::Type::FileName'} = MIME::Type::FileName::guess ($tfile); my $bytes = read_binary ($tfile); # This gives exactly the same results. #my $flmsi = $flm->info_from_string ($bytes); # $results{'File::LibMagic from string'} = $flmsi->{mime_type}; my $valid_utf8 = valid_utf8 ($bytes); $results{'Valid-UTF-8'} = $valid_utf8 ? 'Yes' : 'No'; my $ok = $entry->{ok}; if ($ok) { for my $k (keys %results) { if ($k =~ /encoding/) { next; } my $v = $results{$k}; if ($v =~ /\b\Q$ok\E\b/) { $score{$k}++; } } $results{ok} = $entry->{ok}; $total++; } # my $file = `file $tfile`; # chomp $file; # print "file: $file\n"; # print "\n"; push @fresults, \%results; } my @scorder = keys %score; @scorder = reverse (nsort_by {$score{$_}} @scorder); if ($html) { # HTML output my $div = HTML::Make->new ('div'); for my $entry (@fresults) { my $desc = ucfirst ($entry->{desc}); if ($entry->{ok}) { $desc .= " (\"$entry->{ok}\")"; } $div->push ('h3', text => $desc); my $table = $div->push ('table'); for my $k (sort keys %$entry) { if ($k =~ /^(desc|ok)$/) { next; } my $v = $entry->{$k}; if (! defined $v) { $v = 'undefined'; } my $tr = $table->push ('tr'); my $ok = $entry->{ok}; if ($ok && $v =~ /\b\Q$ok\E\b/) { $tr->add_attr (style => 'background:skyblue;'); } $tr->add_text ("$k $v\n"); } } $div->push ('h2', text => 'Scores', attr => {id => 'scores'}); my $table = $div->push ('table'); for my $k (@scorder) { $table->add_text ("$k$score{$k}/$total\n"); } print $div->text (); } else { # Text output for my $entry (@fresults) { print "File type: $entry->{desc}"; if ($entry->{ok}) { print " (expect mime type \"$entry->{ok}\")"; } print "\n\n"; for my $k (sort keys %$entry) { print "$k: $entry->{$k}\n" unless $k =~ /^(desc|ok)$/; } print "\n"; } print "\nScores:\n\n"; for my $k (@scorder) { print "$k: $score{$k} / $total\n"; } } exit;