#!/usr/bin/perl

# Example code from Chapter 6 of /Perl and LWP/ by Sean M. Burke
# http://www.oreilly.com/catalog/perllwp/
# sburke@cpan.org

require 5;
use strict;
use warnings;

# diary-link-checker -- check links from diary page

# Doesn't actually do anything, because the input is contrived
#  and on a nonexistent host.

use LWP;

my $doc_url = "http://chichi.diaries.int/stuff/diary.html";
my $document;
my $browser;
init_browser();

{  # Get the page whose links we want to check:
  my $response = $browser->get($doc_url);
  die "Couldn't get $doc_url: ", $response->status_line
    unless $response->is_success;
  $document = $response->content;
  $doc_url = $response->request->base;
  # In case we need to resolve relative URLs later
}

while ($document =~ m/href\s*=\s*"([^"\s]+)"/gi) {
  my $absolute_url = absolutize($1, $doc_url);
  check_url($absolute_url);
}

sub absolutize {
  my($url, $base) = @_;
  use URI;
  return URI->new_abs($url, $base)->canonical;
}

sub init_browser {
  $browser = LWP::UserAgent->new;
  # ...And any other initialization we might need to do...
  return $browser;
}

sub check_url {
  # A temporary placeholder...
  print "I should check $_[0]\n";
}
__END__
