#!/usr/local/bin/perl5.8.0 -w # comics to add: # Dilbert Parser: # The Other Coast done # Marmaduke done # King Features (not available): # Bizarro # Crankshaft # Zits # Funky Winkerbean # Mother Goose and Grimm # Baby Blues # Blondie # Curtis # Hagar # Judge Parker # Mark Trail # The Piranha Club # Sally Forth # Others: # Freefall done # Bruno After 20060101 # Little Dee After 20060101 # Kevin and Kell done use strict; use IO::File; use POSIX qw(strftime); require LWP::UserAgent; # Utility Functions my $ua = LWP::UserAgent->new(); sub get_page { my ($url, $file_to_store) = @_; my $request = HTTP::Request->new("GET", $url); my $response = $ua->request($request, $file_to_store); if ($response->is_success) { return $response->content(); } else { print $response->error_as_HTML; return 0; } } my %dilbert = ( 'url' => 'http://www.dilbert.com' ); my %ucomics = ( 'url' => 'http://www.ucomics.com' ); # A parser returns (url of the strip, url of the img) for a comic my %alt_name = ( 'Dilbert' => 'Dilbert.com' ); sub dilbert_parser { my ($comic) = @_; defined($alt_name{$comic}) and $comic = $alt_name{$comic}; #debug #print "YYY $comic\n"; my ($url) = $dilbert{page} =~ /OPTION VALUE="(\S*)">\s*$comic/i; $url or return; #debug #print "XXX $url\n"; $url = $dilbert{url} . $url if $url !~ /^http/i; my $buf = get_page($url); $buf =~ /IMG\s*SRC=\S*["'](\S*\d\d\d\d\d\d\d+\.(?:gif|jpg))["'][^>]*alt/i or return; my $img = $1; $img = $dilbert{url} . $img if $img !~ /^http/i; return ($url, $img); } sub ucomics_parser { my ($comic) = @_; my ($url) = $ucomics{page} =~ /OPTION VALUE="(\S*)">\s*$comic/i; $url or return; $url = $ucomics{url} . $url if $url !~ /^http/i; my $buf = get_page($url); $buf =~ /IMG\s*SRC=\S*"(\S*\/\S+\d\d\d+\.(?:gif|jpg))/i or return; return ($url, $1); } my @sites = ( \%dilbert, \%ucomics ); my @comics = ('Dilbert' => \&dilbert_parser, 'Garfield' => \&ucomics_parser, # 'Peanuts' => \&dilbert_parser, # 'Betty' => \&dilbert_parser, # 'Nancy' => \&dilbert_parser, '9 Chickweed Lane' => \&dilbert_parser, 'Alley Oop' => \&dilbert_parser, 'Grand Ave' => \&dilbert_parser, 'Jane\'s World Classics' => \&dilbert_parser, 'Monty' => \&dilbert_parser, 'Pibgorn' => \&dilbert_parser, 'Drabble' => \&dilbert_parser, 'Baldo' => \&ucomics_parser, 'Calvin & Hobbes' => \&ucomics_parser, 'Cleats' => \&ucomics_parser, 'Prickly City' => \&ucomics_parser, 'For Better or For Worse' => \&ucomics_parser, 'Heart of the City' => \&ucomics_parser, 'Pluggers' => \&ucomics_parser, 'FoxTrot' => \&ucomics_parser, 'Get Fuzzy' => \&dilbert_parser, 'Over the Hedge' => \&dilbert_parser, 'LuAnn' => \&dilbert_parser, 'One Big Happy Classics' => \&dilbert_parser, 'Pearls' => \&dilbert_parser, 'Red & Rover' => \&dilbert_parser, 'Rose is Rose' => \&dilbert_parser, 'State of the Union' => \&dilbert_parser, 'Wizard of Id' => \&dilbert_parser, # 'Frank and Ernest'=> \&dilbert_parser, # 'Ziggy' => \&ucomics_parser, # 'Herman' => \&dilbert_parser, # 'Non Sequitur' => \&ucomics_parser, # 'Reality Check' => \&dilbert_parser, 'B.C.' => \&dilbert_parser, # 'PC and Pixel' => \&dilbert_parser, # 'The Born Loser' => \&dilbert_parser, # 'Doonesbury' => \&ucomics_parser, 'Shoe' => \&ucomics_parser, 'Tank McNamara' => \&ucomics_parser, 'Tom the D. Bug' => \&ucomics_parser, # 'Cathy' => \&ucomics_parser, #'Charlie', #'Ballard Street', # 'Speed Bump' => \&dilbert_parser, # 'Jump Start' => \&dilbert_parser, 'Pickles' => \&dilbert_parser, 'Chuck Asay' => \&dilbert_parser, 'The Other Coast' => \&dilbert_parser, 'Marmaduke' => \&dilbert_parser, #'The 5th Wave', ); foreach my $site (@sites) { $site->{page} = get_page($site->{url}) or $site->{page} = "NONE\n"; } my $home = $ENV{HOME}; chdir(($^O =~ /Win32/) ? "d:/public_html" : "$home/public_html") or die "Could not chdir: $!"; my $time = time(); #check for yesterdays page my $date = strftime("%m%d%Y", localtime($time)); my $archive_page = "comics-archive/comics-$date.html"; my $file; unless (-e $archive_page) { $file = new IO::File; open($file, "> $archive_page") or die "could not open $archive_page: $!"; } my $html_str = ""; sub output { $html_str .= "@_"; print $file "@_" if $file; } my $now = localtime($time); output "
| ".
" $comic ". " | |
| ".
# ' Kim & Jason '. # ''. # ' '.
# " | |
| ".
' Dr. Fun '. ''. ' '.
" | |
| ".
' Freefall '. ''. 'Click Here' . ' alt="Freefall, http://freefall.purrsia.com">'. " |