#!/data/asictools/bin/perl -w

=head1 NAME

HtmlDiff - providing text change formattings as in Winword based on diff(1)

=head1 SYNOPSIS

	HtmlDiff [OPTIONS]  <OldFile> <NewFile>

=head1 OPTIONS

        -t		treates files as plain text instead of HTML formatted
	-n		spawn netscape on result
	-w		ignore white space (this is the same switch as for diff(1))
	-s <String>	insert this <String> at begin of every difference, so allowing for 'find' differences in editor or browser
	-S <String>     separator for words (diff a line requires <String>=\\n, diff single characters requires <String>=.).


=head1 DESCRIPTION

This tool enhances the features of diff(1) with colored markings. It allows a diff per word (instead of per line).
It may interwork with netscape.

=head1 CAVEATS

the HTML mode is alpha. The worst case, deletion of text, may occure.

=head1 AUTHOR

B.Weiler, Siemens ICN TR ON EA, 2.99

=head1 PREREQUISITES

diff(1)
Pod::Usage

=head1 COREQUISITES

netscape

=head1 OSNAMES

This script is known to work on C<Solaris 2.5.1>

=head1 SCRIPT CATEGORIES

CPAN

=cut

require 5.002;
#use IgnoreUserlocal;
use English;
use strict;
use Pod::Usage;

my $TM=0;
my $OldFile;
my $NewFile;
my $Netscape=0;
my $DiffArgs='';
my $String;
my %Style=(
  StartOld => "<STRIKE><I><FONT COLOR=\"blue\">",
  EndOld => "</FONT></I></STRIKE>",
  StartNew => "<U><B><FONT COLOR=\"red\">",
  EndNew => "</FONT></B></U>",
  );
my $SplitRegexp=" ";

while($#ARGV >=0){
  $_=shift;
  if($_ eq '-h'){
    pod2usage(-verbose => 2);
    exit;
  }
  elsif($_ eq '-t'){
    $TM=1;
  }
  elsif($_ eq '-w'){$DiffArgs='-w'}
  elsif($_ eq '-n'){
    $Netscape=1;
  }
  elsif($_ eq '-s'){
    $String=shift;
    $Style{StartNew}.=$String;
    $Style{StartOld}.=$String;
  }
  elsif($_ eq '-S'){
    $SplitRegexp=shift;
  }
  else{
    $OldFile=$_;
    $NewFile=shift;    
  }
}

open(FH,$OldFile);
open(FHH,">/tmp/h1_$$");
{
  my $In=0;
  my $is;
  while(<FH>){
    s|<|&lt;|g if($TM);
    s|>|&gt;|g if($TM);
    s|\t|      |g if($TM);
    s| ( *)|" "."&nbsp;&nbsp;&nbsp;" x length($&)|eg if($TM);
    my @il=split($SplitRegexp,$_);
    push(@il,"<br>") if($TM);
    foreach(@il){
      if(/^[^>]*</){
	$is='';
	$In=1;
      }
      unless($In){
	print FHH "$_\n";
      }
      else{$is.=' '.$_}
      if(/>[^<]*$/){
	$In=0;
	print FHH "$is\n";
      } 
    }
  }
}
close(FHH);
close(FH);
$OldFile="/tmp/h1_$$";
open(FH,$NewFile);
open(FHH,">/tmp/h2_$$");
{
  my $In=0;
  my $is;
  while(<FH>){
    s|<|&lt;|g if($TM);
    s|>|&gt;|g if($TM);
    s|\t|      |g if($TM);
    s| ( *)|" "."&nbsp;&nbsp;&nbsp;" x length($&)|eg if($TM);
    my @il=split($SplitRegexp,$_);
    push(@il,"<br>") if($TM);
    foreach(@il){
      if(/^[^>]*</){
	$is='';
	$In=1;
      }
      unless($In){
	print FHH "$_\n";
      }
      else{$is.=' '.$_}
      if(/>[^<]*$/){
	$In=0;
	print FHH "$is\n";
      } 
    }
  }
}
close(FHH);
close(FH);
$NewFile="/tmp/h2_$$";


my (@OldLines,@NewLines,$ii,$iii,$iiii,$is);

open(DIFF,"diff -f $DiffArgs $OldFile $NewFile |");
open(FH,$OldFile);
push(@OldLines,'');
push(@NewLines,'');
while(<FH>){
  chomp;
  push(@OldLines,$_);
  push(@NewLines,$_);
  warn"unclosed or unopened tag detected, malfunction warning: $_" if(/<[^>]*$|^[^<]*>/);
}
close(FH);
$iii=0;

#no strict 'ref';
my $TmpFile="$ENV{HOME}/.HtmlDiff";
my $FH;
if($Netscape){open(FH,">$TmpFile") or die;$FH=\*FH}
else{$FH=\*STDOUT}

print $FH "<html><BODY BGCOLOR=white>\n" ;
print $FH "<i>How to read this <b>diff</b>:</i>
  <ul>
    <li>removed old parts are marked as $Style{StartOld}this one$Style{EndOld}
    <li>new parts are marked as $Style{StartNew}this one$Style{EndOld}
  </ul>";
print $FH "<BLINK>Mode=HTML: this mode is experimental, errors are probable</BLINK>" unless($TM);
print $FH "  <hr>
  ";
while(<DIFF>){
  if(/^d(\d+)(\s+(\d+))?$/){
    $iiii= defined $2 ? $2 : $1;
    for($ii=$iii;$ii<$1;$ii++){PrintLine('Equal',$NewLines[$ii])}
    for($ii=$1;$ii<=$iiii;$ii++){PrintLine('Old',$NewLines[$ii])}
    $iii=$iiii+1;
  }
  elsif(/^c(\d+)(\s+(\d+))?$/){
    $iiii= defined $2 ? $2 : $1;
    for($ii=$iii;$ii<$1;$ii++){PrintLine('Equal',$NewLines[$ii])}
    for($ii=$1;$ii<=$iiii;$ii++){PrintLine('Old',$NewLines[$ii])}
    while(defined($is=<DIFF>)){
      chomp;
      if($is=~/^\.$/){last}
      else{PrintLine('New',$is)}
    }
    $iii=$iiii+1;
  }
  elsif(/^a(\d+)$/){
    $iiii=$1;
    for($ii=$iii;$ii<=$1;$ii++){PrintLine('Equal',$NewLines[$ii])}
    while(defined($is=<DIFF>)){
      if($is=~/^\.$/){last}
      else{PrintLine('New',$is)}
    }
    $iii=$1+1;
  }
}
for($ii=$iii;$ii<=$#NewLines;$ii++){PrintLine('Equal',$NewLines[$ii])}
unlink($OldFile,$NewFile);
print $FH "<hr>";
close($FH);
if($Netscape){
  system("netscape -remote 'openURL(file:$TmpFile)'");
  if($?){
    &InfoTextArea('Please quit the HTML viewer after reading.');
    system("netscape -ncols 10 $TmpFile &");
  }
}
exit 0;

sub PrintLine($$){
  my $Mode=shift;
  my $Line=shift;
  chomp $Line;
  if($Mode eq 'New'){
    $Line=~s|<font.*?>||ig;
    if($Line=~/<td>/i){
      $Line=~s|<td>|<TD>$Style{StartNew}|ig;
      print $FH "$Line$Style{EndNew}\n";
    }
    else{
      print $FH "$Style{StartNew}$Line$Style{EndNew}\n";
    }
  }
  elsif($Mode eq 'Old'){
    $Line=~s|<[^<]+>||g;
    print $FH "$Style{StartOld}$Line$Style{EndOld}\n";
  }
  elsif($Mode eq 'Equal'){print $FH "$Line\n"}
  else{die"Illegal Mode for PrintLine: $Mode"}
}

sub BreakWords(){
}
