#!/usr/bin/perl

# Example code from Chapter 8 of /Perl and LWP/ by Sean M. Burke
# http://www.oreilly.com/catalog/perllwp/
# sburke@cpan.org

require 5;
use warnings;
use strict;
use constant DEBUG => 0;
use HTML::TokeParser;

#parse_fresh_stream(
#  HTML::TokeParser->new('fresh1.html') || die($!),
#  'http://freshair.npr.org/dayFA.cfm?todayDate=07%2F02%2F2001'
#);

scan_last_month();

sub scan_last_month {
  use LWP::UserAgent;
  my $browser = LWP::UserAgent->new();
  foreach my $date_mdy (weekdays_last_month()) {
    my $url = sprintf(
     'http://freshair.npr.org/dayFA.cfm?todayDate=%02d%%2f%02d%%2f%04d',
     @$date_mdy
    );
    DEBUG and print "Getting @$date_mdy URL $url\n";
    sleep 3; # Don't hammer the NPR server!
    my $response = $browser->get($url);
    unless($response->is_success) {
      print "Error getting $url: ", $response->status_line, "\n";
      next;
    }
    my $stream = HTML::TokeParser->new($response->content_ref)
     || die "What, couldn't make a stream?!";
    parse_fresh_stream($stream, $response->base);
  }
}

sub weekdays_last_month {
  my($now) = time;
  my $this_month = (gmtime $now)[4];
  my(@out, $last_month, $that_month);

  do { # Get to end of last month.
    $now -= (24 * 60 * 60); # go back a day
    $that_month = (gmtime $now)[4];
  } while($that_month == $this_month);
  $last_month = $that_month;

  do { # Go backwards thru last month
    my(@then) = (gmtime $now);
    unshift @out, [$then[4] + 1 , $then[3], $then[5] + 1900] # m,d,yyyy
      unless $then[6] == 0 or $then[6] == 6;
    $now -= (24 * 60 * 60); # go back one day
    $that_month = (gmtime $now)[4];
  } while($that_month == $last_month);
  return @out;
}

sub parse_fresh_stream {
  use URI;
  my($stream, $base_url) = @_;
  DEBUG and print "About to parse stream with base $base_url\n";

  $stream->{'textify'} = {};
   # to nix interpolation of the alt values on images

  while(my $a_tag = $stream->get_tag('a')) {
    DEBUG > 1 and printf "Considering {%s}\n", $a_tag->[3];
    my $url = URI->new_abs( ($a_tag->[1]{'href'} || next), $base_url);
    unless($url->scheme eq 'http') {
      DEBUG > 1 and print "Scheme is no good in $url\n";
      next;
    }
    unless($url->host =~ m/www\.npr\.org/) {
      DEBUG > 1 and print "Host is no good in $url\n";
      next;
    }
    unless($url->path =~ m{/ramfiles/.*\.ram$}) {
      DEBUG > 1 and print "Path is no good in $url\n";
      next;
    }
    DEBUG > 1 and print "IT'S GOOD!\n";
    my $text = $stream->get_trimmed_text('/a') || "??";
    
    unless($text =~ s/^Listen to //) {
      DEBUG > 1 and print "Odd, \"$text\" doesn't start with \"Listen to\"...\n";
      next;
    }

    printf "%s\n  %s\n", $text, $url;
  }
  DEBUG and print "End of stream\n";
  return;
}

__END__

Example output:

Monday - June 3, 2002
  http://www.npr.org/ramfiles/fa/20020603.fa.ram
Political Columnist David Newman
  http://www.npr.org/ramfiles/fa/20020603.fa.01.ram
Gino Yevdjevich
  http://www.npr.org/ramfiles/fa/20020603.fa.02.ram
Linguist Geoff Nunberg considers "journalese".
  http://www.npr.org/ramfiles/fa/20020603.fa.03.ram
Tuesday - June 4, 2002
  http://www.npr.org/ramfiles/fa/20020604.fa.ram
Comic book writer Stan Lee
  http://www.npr.org/ramfiles/fa/20020604.fa.01.ram
Book critic Maureen Corrigan
  http://www.npr.org/ramfiles/fa/20020604.fa.02.ram
Wednesday - June 5, 2002
  http://www.npr.org/ramfiles/fa/20020605.fa.ram
Directors Chris and Paul Weitz
  http://www.npr.org/ramfiles/fa/20020605.fa.01.ram
Jazz critic Kevin Whitehead
  http://www.npr.org/ramfiles/fa/20020605.fa.02.ram
Thursday - June 6, 2002
  http://www.npr.org/ramfiles/fa/20020606.fa.ram
Photographer and reporter Scott Peterson
  http://www.npr.org/ramfiles/fa/20020606.fa.01.ram
Robert Jay Lifton
  http://www.npr.org/ramfiles/fa/20020606.fa.02.ram
Friday - June 7, 2002
  http://www.npr.org/ramfiles/fa/20020607.fa.ram
Writer Michael Pollan
  http://www.npr.org/ramfiles/fa/20020607.fa.01.ram
Rock critic Ken Tucker
  http://www.npr.org/ramfiles/fa/20020607.fa.02.ram
Film critic John Powers
  http://www.npr.org/ramfiles/fa/20020607.fa.03.ram
Monday - June 10, 2002
  http://www.npr.org/ramfiles/fa/20020610.fa.ram
Writer John Ridley
  http://www.npr.org/ramfiles/fa/20020610.fa.01.ram
Music critic Milo Miles
  http://www.npr.org/ramfiles/fa/20020610.fa.02.ram
Tuesday - June 11, 2002
  http://www.npr.org/ramfiles/fa/20020611.fa.ram
Raja Shehadeh is a Palestinian lawyer and writer whose latest book is...
  http://www.npr.org/ramfiles/fa/20020611.fa.01.ram
Writer Michael Oren
  http://www.npr.org/ramfiles/fa/20020611.fa.02.ram
Linguist Geoffrey Nunberg
  http://www.npr.org/ramfiles/fa/20020611.fa.03.ram
Wednesday - June 12, 2002
  http://www.npr.org/ramfiles/fa/20020612.fa.ram
National security expert Loch Johnson
  http://www.npr.org/ramfiles/fa/20020612.fa.01.ram
Rev. Roy Hawthorne
  http://www.npr.org/ramfiles/fa/20020612.fa.02.ram
Classical music critic Lloyd Schwartz
  http://www.npr.org/ramfiles/fa/20020612.fa.03.ram
Thursday - June 13, 2002
  http://www.npr.org/ramfiles/fa/20020613.fa.ram
Actor Nicolas Cage
  http://www.npr.org/ramfiles/fa/20020613.fa.01.ram
Friday - June 14, 2002
  http://www.npr.org/ramfiles/fa/20020614.fa.ram
Crime novelist Dennis Lehane
  http://www.npr.org/ramfiles/fa/20020614.fa.01.ram
Los Angeles Lakers coach, Phil Jackson
  http://www.npr.org/ramfiles/fa/20020614.fa.02.ram
Painter Larry Rivers
  http://www.npr.org/ramfiles/fa/20020614.fa.03.ram
Film critic Henry Sheehan
  http://www.npr.org/ramfiles/fa/20020614.fa.04.ram
Monday - June 17, 2002
  http://www.npr.org/ramfiles/fa/20020617.fa.ram
Actress Jodie Foster
  http://www.npr.org/ramfiles/fa/20020617.fa.01.ram
Rock historian Ed Ward
  http://www.npr.org/ramfiles/fa/20020617.fa.02.ram
Tuesday - June 18, 2002
  http://www.npr.org/ramfiles/fa/20020618.fa.ram
Science writer Douglas Starr
  http://www.npr.org/ramfiles/fa/20020618.fa.01.ram
David Bianculli
  http://www.npr.org/ramfiles/fa/20020618.fa.02.ram
Wednesday - June 19, 2002
  http://www.npr.org/ramfiles/fa/20020619.fa.ram
Actor Matt Damon
  http://www.npr.org/ramfiles/fa/20020619.fa.01.ram
Rock critic Ken Tucker
  http://www.npr.org/ramfiles/fa/20020619.fa.02.ram
Thursday - June 20, 2002
  http://www.npr.org/ramfiles/fa/20020620.fa.ram
Stand up comics Ahmed Ahmed and Maz Jobrani
  http://www.npr.org/ramfiles/fa/20020620.fa.01.ram
Pakistani dancer Sheema Kermani
  http://www.npr.org/ramfiles/fa/20020620.fa.02.ram
Rock critic Ken Tucker
  http://www.npr.org/ramfiles/fa/20020620.fa.03.ram
Friday - June 21, 2002
  http://www.npr.org/ramfiles/fa/20020621.fa.ram
Writer Barry Hannah
  http://www.npr.org/ramfiles/fa/20020621.fa.01.ram
Rock historian Ed Ward
  http://www.npr.org/ramfiles/fa/20020621.fa.02.ram
We remember poet and essayist June Jordan.
  http://www.npr.org/ramfiles/fa/20020621.fa.03.ram
Film critic John Powers
  http://www.npr.org/ramfiles/fa/20020621.fa.04.ram
Monday - June 24, 2002
  http://www.npr.org/ramfiles/fa/20020624.fa.ram
Boxer Laila Ali
  http://www.npr.org/ramfiles/fa/20020624.fa.01.ram
Actress Cherry Jones
  http://www.npr.org/ramfiles/fa/20020624.fa.02.ram
Book critic Maureen Corrigan
  http://www.npr.org/ramfiles/fa/20020624.fa.03.ram
Tuesday - June 25, 2002
  http://www.npr.org/ramfiles/fa/20020625.fa.ram
Boston Globe reporters Walter Robinson and Mike Rezendes
  http://www.npr.org/ramfiles/fa/20020625.fa.01.ram
Human rights leader Jeri Laber
  http://www.npr.org/ramfiles/fa/20020625.fa.02.ram
Music critic Milo Miles
  http://www.npr.org/ramfiles/fa/20020625.fa.03.ram
Wednesday - June 26, 2002
  http://www.npr.org/ramfiles/fa/20020626.fa.ram
Terrorism Expert Rohan Gunaratna
  http://www.npr.org/ramfiles/fa/20020626.fa.01.ram
Writer James Gavin
  http://www.npr.org/ramfiles/fa/20020626.fa.02.ram
Thursday - June 27, 2002
  http://www.npr.org/ramfiles/fa/20020627.fa.ram
Musician and actor Steve Van Zandt
  http://www.npr.org/ramfiles/fa/20020627.fa.01.ram
Columnist Robert Wolke
  http://www.npr.org/ramfiles/fa/20020627.fa.02.ram
Friday - June 28, 2002
  http://www.npr.org/ramfiles/fa/20020628.fa.ram
Today marks the centennial of the birth of composer Richard Rodgers
  http://www.npr.org/ramfiles/fa/20020628.fa.01.ram


