#!/usr/bin/perl
#
# fixlinks 0.2
# Copyright (c) 1997 by Christian Schwarz <schwarz@monet.m.isar.de>
# May by distributed under GPL 2.
#

# Specification:
#
# Processes HTML files specified on the command line and removes
# all references to non-existing local files. The output is written
# to the same files, while the old are renamed. 
#
# A link     <a href="foo.html">Foos and Bars</a>    is changed to
#         <!--REF:foo.html a href="foo.html"-->Foos and Bars<!--/a--> if the
# file foo.html does not exist, and vice versa.
#
# Note, that this program only handles HTML files correctly, that do
# comply with HTML syntax.

use Cwd;
use File::Basename;

package Parser; #-------------------------------
use HTML::Parser;
@ISA = qw(HTML::Parser);

sub declaration {
  my ($self, $decl) = @_;
  print ::OUT "<!$decl>";
}

sub start {
  my ($self, $tag, $attr, $attrseq, $origtext) = @_;

  my ($href);
  if (($tag eq 'a') and defined($href = $$attr{'href'}) and
      (not ($href =~ s/^(\S+:)//o) or ($1 =~ /file:/i))) {
    my $type = $1;
    $href =~ s/(\#.*)$//o;
    my $anchor = $1;
    #print "href: ($type,$href,$anchor)\n";

    # file exists?
    if (-f $href) {
      # yes, no change
    } else {
      # no! Change URL
      $origtext = "<!--REF:$href a";
      for $tag (@$attrseq) {
	if ($$attr{$tag}) {
	  $origtext .= " $tag=\"$$attr{$tag}\"";
	} else {
	  $origtext .= " $tag";
	}
      }
      $origtext .= "-->";
      $comment_next = 1;
    }
  }

  print ::OUT $origtext;
}

sub end {
  my ($self, $tag) = @_;

  if (($tag eq 'a') and ($comment_next)) {
    print ::OUT "<!--ENDREF a-->";
    $comment_next = 0;
  } else {
    print ::OUT "</$tag>";
  }
}

sub text {
  my ($self, $text) = @_;
  print ::OUT $text;
}

sub comment {
  my ($self, $comment) = @_;

  if ($comment =~ /^REF:(\S+)\s+(.*)$/o) {
    if (-f $1) {
      # file exists!
      print ::OUT "<$2>";
      $uncomment_next = 1;
      return;
    } else {
      # file still doesn't exist, no change
    }
  } elsif ($uncomment_next and ($comment =~ s/^ENDREF\s+(.+)$//o)) {
    print ::OUT "</$1>";
    $uncomment_next = 0;
    return;
  }

  print ::OUT "<!--$comment-->";
}

#########################################################################

package main;

if ($#ARGV == -1) {
  print "usage: fixlinks <html file> ...\n";
  exit 1;
}

my $starting_dir = cwd;

my $p = Parser->new;

my $filename;
while ($filename = shift) {
  if ( ! -f $filename ) {
    print "error: file $filename not found, skipping.\n";
    next;
  }

  rename($filename,"$filename~") or die "cannot rename $filename: $!";

  open(OUT,">$filename") or die "cannot open output file $filename: $!";
  $rpath = dirname($filename);
  chdir($rpath) if $rpath;
  $p->parse_file(basename($filename)."~");
  close(OUT);
  unlink(basename($filename)."~");
  chdir($starting_dir);
}

exit 0;
