#!/usr/local/bin/perl5.8.0 -w # comics to add: # Dilbert Parser: # The Other Coast done # Marmaduke done # King Features (not available): # Bizarro # Crankshaft # Zits # Funky Winkerbean # Mother Goose and Grimm # Baby Blues # Blondie # Curtis # Hagar # Judge Parker # Mark Trail # The Piranha Club # Sally Forth # Others: # Freefall done # Bruno After 20060101 # Little Dee After 20060101 # Kevin and Kell done use strict; use IO::File; use POSIX qw(strftime); require LWP::UserAgent; # Utility Functions my $ua = LWP::UserAgent->new(); sub get_page { my ($url, $file_to_store) = @_; my $request = HTTP::Request->new("GET", $url); my $response = $ua->request($request, $file_to_store); if ($response->is_success) { return $response->content(); } else { print $response->error_as_HTML; return 0; } } my %dilbert = ( 'url' => 'http://www.dilbert.com' ); my %ucomics = ( 'url' => 'http://www.ucomics.com' ); # A parser returns (url of the strip, url of the img) for a comic my %alt_name = ( 'Dilbert' => 'Dilbert.com' ); sub dilbert_parser { my ($comic) = @_; defined($alt_name{$comic}) and $comic = $alt_name{$comic}; #debug #print "YYY $comic\n"; my ($url) = $dilbert{page} =~ /OPTION VALUE="(\S*)">\s*$comic/i; $url or return; #debug #print "XXX $url\n"; $url = $dilbert{url} . $url if $url !~ /^http/i; my $buf = get_page($url); $buf =~ /IMG\s*SRC=\S*["'](\S*\d\d\d\d\d\d\d+\.(?:gif|jpg))["'][^>]*alt/i or return; my $img = $1; $img = $dilbert{url} . $img if $img !~ /^http/i; return ($url, $img); } sub ucomics_parser { my ($comic) = @_; my ($url) = $ucomics{page} =~ /OPTION VALUE="(\S*)">\s*$comic/i; $url or return; $url = $ucomics{url} . $url if $url !~ /^http/i; my $buf = get_page($url); $buf =~ /IMG\s*SRC=\S*"(\S*\/\S+\d\d\d+\.(?:gif|jpg))/i or return; return ($url, $1); } my @sites = ( \%dilbert, \%ucomics ); my @comics = ('Dilbert' => \&dilbert_parser, 'Garfield' => \&ucomics_parser, # 'Peanuts' => \&dilbert_parser, # 'Betty' => \&dilbert_parser, # 'Nancy' => \&dilbert_parser, '9 Chickweed Lane' => \&dilbert_parser, 'Alley Oop' => \&dilbert_parser, 'Grand Ave' => \&dilbert_parser, 'Jane\'s World Classics' => \&dilbert_parser, 'Monty' => \&dilbert_parser, 'Pibgorn' => \&dilbert_parser, 'Drabble' => \&dilbert_parser, 'Baldo' => \&ucomics_parser, 'Calvin & Hobbes' => \&ucomics_parser, 'Cleats' => \&ucomics_parser, 'Prickly City' => \&ucomics_parser, 'For Better or For Worse' => \&ucomics_parser, 'Heart of the City' => \&ucomics_parser, 'Pluggers' => \&ucomics_parser, 'FoxTrot' => \&ucomics_parser, 'Get Fuzzy' => \&dilbert_parser, 'Over the Hedge' => \&dilbert_parser, 'LuAnn' => \&dilbert_parser, 'One Big Happy Classics' => \&dilbert_parser, 'Pearls' => \&dilbert_parser, 'Red & Rover' => \&dilbert_parser, 'Rose is Rose' => \&dilbert_parser, 'State of the Union' => \&dilbert_parser, 'Wizard of Id' => \&dilbert_parser, # 'Frank and Ernest'=> \&dilbert_parser, # 'Ziggy' => \&ucomics_parser, # 'Herman' => \&dilbert_parser, # 'Non Sequitur' => \&ucomics_parser, # 'Reality Check' => \&dilbert_parser, 'B.C.' => \&dilbert_parser, # 'PC and Pixel' => \&dilbert_parser, # 'The Born Loser' => \&dilbert_parser, # 'Doonesbury' => \&ucomics_parser, 'Shoe' => \&ucomics_parser, 'Tank McNamara' => \&ucomics_parser, 'Tom the D. Bug' => \&ucomics_parser, # 'Cathy' => \&ucomics_parser, #'Charlie', #'Ballard Street', # 'Speed Bump' => \&dilbert_parser, # 'Jump Start' => \&dilbert_parser, 'Pickles' => \&dilbert_parser, 'Chuck Asay' => \&dilbert_parser, 'The Other Coast' => \&dilbert_parser, 'Marmaduke' => \&dilbert_parser, #'The 5th Wave', ); foreach my $site (@sites) { $site->{page} = get_page($site->{url}) or $site->{page} = "NONE\n"; } my $home = $ENV{HOME}; chdir(($^O =~ /Win32/) ? "d:/public_html" : "$home/public_html") or die "Could not chdir: $!"; my $time = time(); #check for yesterdays page my $date = strftime("%m%d%Y", localtime($time)); my $archive_page = "comics-archive/comics-$date.html"; my $file; unless (-e $archive_page) { $file = new IO::File; open($file, "> $archive_page") or die "could not open $archive_page: $!"; } my $html_str = ""; sub output { $html_str .= "@_"; print $file "@_" if $file; } my $now = localtime($time); output " Comic Strips ". "". "

". "
Comic Strips (Updated daily. This page: ". "$now PDT)". "
\n". ""; my @missing_comics = (); for (my $i=0; $i<@comics/2; $i++) { my ($comic, $fn) = ($comics[$i*2], $comics[$i*2+1]); push @missing_comics, $comic; my ($url, $img) = &$fn($comic); $img or next; # debug #print "XXXX: $url $img\n"; output "\n"; if ($file) { my $dir = "comics-archive-images/$comic"; mkdir($dir); my @arr = split(/\//, $img); my $img_name = $arr[-1]; $img_name =~ s/\d+\././; my $discarded_return_value = get_page($img, "$dir/$date-$img_name"); } pop @missing_comics; } ##include Jaimie Hollenback's comic as he requested: #$html_str .= "\n"; ################################################## #include Dr. Fun: $html_str .= "\n"; ################################################## #include Sherman's Lagoon: my $sl_date = strftime("%y%m%d", localtime($time)); my $sl_img_url = ' Shermans Lagoon
'. ''. $sl_img_url . ' alt="(c) Copyright by Jim Toomey. www.slagoon.com">'. "

\n"; ################################################## #include Day By Day: my $dbd_date = strftime("%m%d%y", localtime($time)); my $dbd_img_url = ' Day By Day
'. ''. $dbd_img_url . ' alt="(c) Copyright by Chris Muir. www.daybydaycartoon.com">'. "

\n"; ################################################## #include Kevin and Kell: my $kak_year = strftime("%Y", localtime($time)); my $kak_date = strftime("%Y%m%d", localtime($time)); my $kak_img_url = ' Kevin and Kell
'. ''. $kak_img_url . ' alt="(c) Copyright by Bill Holbrook. www.kevinandkell.com">'. "

\n"; ################################################## #include Freefall: $html_str .= "\n"; ################################################## output "
". " $comic
". "\"$comic\"". "

". # ' Kim & Jason
'. # ''. # '(c) Copyright by Jason Kotecki. www.KimandJason.com'. # "

". ' Dr. Fun
'. ''. '(c) Copyright by David Farley. www.ibiblio.org'. "

". ' Freefall
'. ''. 'Click Here' . ' alt="Freefall, http://freefall.purrsia.com">'. "



". ((@missing_comics) ? "Missing: @missing_comics
\n" : ''); $html_str .= " Yesterday's page. "; if ($file) { print $file " Next Prev"; } #output "
Source Code. ". # 'Mail me any comics you want included here.
'. # " Raghu Angadi ". # " \n"; output "
Original Source Code by Raghu Angadi \n"; output "
Modified by Mark Shaw \n". " \n"; open(OUT, "> comics.html") or die "Could not open comics.html: $!"; print OUT $html_str;