#!/usr/bin/perl use warnings; use strict; use HTML::TreeBuilder; # This is the file we are going to read. my $file = 'test.html'; # Parse all of the contents of $file. my $parser = HTML::TreeBuilder->new (); $parser->parse_file ($file); # Now display the contents of $parser. recurse ($parser, 0); exit; # This displays the contents of $node and any children it may # have. The variable $depth is the indentation used. sub recurse { my ($node, $depth) = @_; # Print indentation according to the level of recursion. print " " x $depth; # If $node is a reference, then it is an HTML::Element. if (ref $node) { # Print the tag associated with $node, for example "html" or # "li". print $node->tag (), "\n"; # $node->content_list () returns a list of child nodes of # $node, which we store in @children. my @children = $node->content_list (); for my $child_node (@children) { recurse ($child_node, $depth + 1); } } else { # If $node is not a reference, then it is just a piece of text # from the HTML file. print $node, "\n"; } }