#!/usr/bin/perl

# Example code from Chapter 11 of /Perl and LWP/ by Sean M. Burke
# http://www.oreilly.com/catalog/perllwp/
# sburke@cpan.org

require 5;
use strict;
use warnings;

# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
# !!!                                              !!!
# !!!   To be run on the first of every month...   !!!
# !!!                                              !!!
# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

use POSIX ('strftime');
my $last_month = strftime("y%Y-m%m", localtime(time - 24 * 60 * 60));
# Since today is the first, one day ago (24*60*60 seconds) is in
#  last month.
my $url = "http://www.unicode.org/mail-arch/unicode-ml/$last_month/";

use LWP;
my $browser = LWP::UserAgent->new;
$browser->credentials(
  'www.unicode.org:80',  # Don't forget the ":80"!
  # This is no secret...
  'Unicode-MailList-Archives',
  'unicode-ml' => 'unicode'
);
print "Getting topics for last month, $last_month\n",
      " from $url\n";
my $response = $browser->get($url);
die "Error getting $url: ", $response->status_line
  if $response->is_error;

my(%posts, %first_url);
while( ${ $response->content_ref }
 =~ m{<li><a href="(\d+.html)"><strong>(.*?)</strong>}g
   # Like: <li><a href="0127.html"><strong>Klingon</strong>
) {
  my($url, $topic) = ($1,$2);

  # Strip any number of "Re:" prefixes.
  while( $topic =~ s/^Re:\s+//i ) {}

  ++$posts{$topic};
  use URI;   # For absolutizing URLs...
  $first_url{$topic} ||= URI->new_abs($url, $response->base);
}

print "Topics:\n", reverse sort map   # Most common first:
  sprintf("% 5s %s\n       %s\n",
          $posts{$_}, $_, $first_url{$_}
  ), keys %posts;

__END__

