#!/usr/bin/perl -w

#   tarcust -- A Tar Customizer
#   Copyright (C) 1999-2001
#   Denis Barbier <barbier@debian.org>
#
#   New versions of this program are on the tarcust home page
#         http://www.engelschall.com/sw/tarcust/
#
#   This program is a free software released under the GNU GPL License;
#   see the LICENSE file for conditions.

require 5.000;
use strict;

use vars (qw($opt_a $opt_d $opt_e $opt_g $opt_h $opt_p $opt_u $opt_v
             $opt_B $opt_D $opt_E $opt_G $opt_U $opt_V
             @opt_s @opt_x
             $filein $fileout $dir_mode $longnames $header $header_line
             $total_size $new_values $contents $long_filename
             %list_dirs
));

#   The tar files format is quite simple. For each file, the archive
#   contains a header line of 512 bytes containing all informations
#   about this file followed by the contents of the file. After then
#   null chars are inserted so that each file size is a multiple of 512
#   bytes.
#   For files with a null size (e.g. directories, links, devices,...),
#   just the header is put in the file.

#   Default size of header line is 512.
my $blocksize = 512;

#   Structure of the header
#
#                          offset length  unpack   pack  octal
#
my $structure_header = {
        name            => [   0,   100,  "A100", "a100"],
        mode            => [ 100,     8,    "A8", "a8",  "%07o"],
        uid             => [ 108,     8,    "A8", "a8",  "%07o"],
        gid             => [ 116,     8,    "A8", "a8",  "%07o"],
        size            => [ 124,    12,   "A12", "a12", "%011o"],
        mtime           => [ 136,    12,   "A12", "a12", "%011o"],
        chksum          => [ 148,     8,    "A8", "A8",  "%06o\0 "],
        typeflag        => [ 156,     1,    "A1", "a1"],
        linkname        => [ 157,   100,  "A100", "a100"],
        magic           => [ 257,     6,    "A6", "A6"],
        version         => [ 263,     2,    "A2", "a2"],
        uname           => [ 265,    32,   "A32", "a32"],
        gname           => [ 297,    32,   "A32", "a32"],
        devmajor        => [ 329,     8,    "a8", "a8",  "%07o"],
        devminor        => [ 337,     8,    "a8", "a8",  "%07o"],
        prefix          => [ 345,   155,  "A155", "a155"],
        null            => [ 500,    12,   "x12", "x12"],
};

#   Initializes the structure and returns a reference to it.
#   Apart the structure above, the header line is stored in the
#   ``input'' hash reference and the list of keys in the structure is
#   stored in ``keys''
sub init_block {
        my ($input) = shift;
        my $block = {};
        my @keys  = ();
        my $unpack_format = '';
        my $pack_format   = '';

        #   keys sre sorted for user messages
        foreach (sort { $structure_header->{$a}->[0] <=> $structure_header->{$b}->[0] } keys %$structure_header) {
                $block->{$_} = {
                        'offset' => $structure_header->{$_}->[0],
                        'length' => $structure_header->{$_}->[1],
                        'unpack' => $structure_header->{$_}->[2],
                        'pack'   => $structure_header->{$_}->[3],
                        'octal'  => defined($structure_header->{$_}->[4]) ?
                                        $structure_header->{$_}->[4] : "",
                };
                push(@keys, $_);
                $unpack_format .= $block->{$_}->{unpack}." ";
                $pack_format   .= $block->{$_}->{pack}." ";
        }
        $block->{input}  = $input;
        $block->{keys}   = \@keys;
        $block->{unpack} = $unpack_format;
        $block->{pack}   = $pack_format;
        my (@entries) = unpack $unpack_format, $input;
        foreach (@keys) {
                $block->{$_}->{value} = shift @entries || "";
                $block->{$_}->{value} =~ s/\0+$//;
                $block->{$_}->{value} = oct($block->{$_}->{value})
                        if $block->{$_}->{octal} and $block->{$_}->{value} ne "";
        }

        #   Calculate chksum
        substr ($input, $block->{chksum}->{offset}, $block->{chksum}->{length})
                = " " x $block->{chksum}->{length};
        warn "Warning: checksum error with entry: $block->{name}->{value}\n"
                if unpack ("%16C*", $input) != $block->{chksum}->{value}
                   and $block->{typeflag}->{value} !~ m/[KL]/;

        #   Fix some special cases
        $block->{typeflag}->{value} = 0
                if $block->{typeflag}->{value} eq "";
        $block->{version}->{value} ||= " ";

        if ($block->{name}->{value} eq "././\@LongLink") {
                read($filein, $long_filename, $blocksize) == $blocksize
                        or die "error occurred when reading";
                read($filein, $header_line, $blocksize) == $blocksize
                        or die "error occurred when reading";
                msgdebug("Long filename: $long_filename");
                $block = init_block($header_line);
                $block->{name}->{value} = $long_filename;
                $block->{realname}      = $long_filename;
                $long_filename = "";
        }

        return $block;
}

#  Copy block
sub copy_block {
        my $block = shift;

        my $newblock = {};
        foreach (@{$block->{keys}}) {
                $newblock->{$_} = {};
                foreach my $field (qw(offset length unpack pack octal value)) {
                        $newblock->{$_}->{$field} = $block->{$_}->{$field};
                }
        }
        $newblock->{input}  = $block->{input};
        $newblock->{keys}   = $block->{keys};
        $newblock->{unpack} = $block->{unpack};
        $newblock->{pack}   = $block->{pack};
        return $newblock;
}

#   build the header line from the header structure
sub hash2string {
        my ($block) = shift;
        my ($name, $input, $chksum, $prefix);

        $prefix = '';
        $block->{realname} = $block->{name}->{value};
        $block->{realname} =~ s/\0+$//;

        #   Fix file name
        if (length($block->{name}->{value}) > $block->{name}->{length}) {
                my $newblock = copy_block($block);

                $block->{name}->{value} =~ s/\0+$//;
                $newblock->{size}->{value}  = length($block->{name}->{value})+1;
                $newblock->{name}->{value}  = "././\@LongLink";
                $newblock->{mode}->{value}  = 0;
                $newblock->{uid}->{value}   = 0;
                $newblock->{gid}->{value}   = 0;
                $newblock->{mtime}->{value} = 0;
                $newblock->{uname}->{value} = "root";
                $newblock->{gname}->{value} = "root";
                if ($block->{typeflag}->{value} == 0 ||
                    $block->{typeflag}->{value} == 5) {
                        $newblock->{typeflag}->{value} = "L";
                } else {
                        die "Fatal error: unknown format found with long filenames\n";
                }
                $prefix = hash2string($newblock) .
                        pack("a$blocksize", $block->{name}->{value});
                substr ($block->{name}->{value}, $block->{name}->{length}-1) = "";
                $total_size += 2 * $blocksize;
        }

        $input = ' ' x $blocksize;
        my @entries = ();
        foreach (@{$block->{keys}}) {
                if ($_ eq "chksum") {
                        push (@entries, "");
                } else {
                        if ($block->{$_}->{octal} && $block->{$_}->{value} ne "") {
                                push (@entries, sprintf("$block->{$_}->{octal}",
                                        $block->{$_}->{value}));
                        } else {
                                push (@entries, $block->{$_}->{value});
                        }
                }
        }
        #   calculate the checksum for this new string
        $input = pack($block->{pack}, @entries);
        $block->{chksum}->{value} = unpack("%16C*", $input);

        #   and store this checksum and the string
        substr ($input, $block->{chksum}->{offset}, $block->{chksum}->{length})
                = sprintf("$block->{chksum}->{octal}", $block->{chksum}->{value});
        $block->{input} = $input;

        return $prefix.$input;
}

#   print informations stored in the header
sub print_header {
        my ($block) = shift;

        foreach (@{$block->{keys}}) {
                msgdebug("$_: ".pack($block->{$_}->{pack},
                                        $block->{$_}->{value}));
        }
}

#   read contents of the file. In the archive, a file fills
#   entire blocks
sub read_contents {
        my ($header) = shift;
        my ($contents, $size);

        $size = $header->{size}->{value};
        if ($size % $blocksize != 0) {
                $size += $blocksize - ($size % $blocksize);
        }
        read($filein, $contents, $size) == $size
                || die "error occurred when reading";
        return $contents;
}

#   adds directory to the archive when a filename is inserted whereas
#   its parent directory is not.
sub add_directory {
        my $header = shift;
        my ($output, $filename, $dir, $header_dir);

        #   copy the header to a new block
        $header_dir = copy_block($header);

        #   change the type flag to tell this is a directory
        $header_dir->{typeflag}->{value} = 5;
        $header_dir->{size}->{value} = 0;
        $header_dir->{mode}->{value} = $dir_mode;

        $dir = $output = '';
        $filename = $header->{realname};
        $filename =~ s|/+[^/]*$||;
        foreach (split('/', $filename)) {
                $dir .= $_ . '/';
                $header_dir->{name}->{value} = $dir;
                if (not exists($list_dirs{$dir}) and
                    $dir ne $filename."/") {
                        $output .= hash2string($header_dir);
                        msgdebug("Adding directory $dir");
                }
                $list_dirs{$dir} = 1;
        }
        return $output;
}

sub msgdebug {
        my ($string) = @_;
        print STDERR "tarcust:debug: " . $string . "\n" if $opt_d == 1;
}

#   Apply changes requested by command-line arguments
#   Options are kept in a hash table.
sub change_values {
        my ($values, $block) = @_;

        return if $block->{name}->{value} !~ m{^$opt_a$};

        if ($values->{uname} ne '') {
                $block->{uname}->{value} = $values->{uname};
                $block->{uid}->{value}   = 0;
        }
        $block->{uid}->{value} = $values->{uid}
                if $values->{uid} ne '';

        if ($values->{gname} ne '') {
                $block->{gname}->{value} = $values->{gname};
                $block->{gid}->{value}   = 0;
        }
        $block->{gid}->{value} = $values->{gid}
                if $values->{gid} ne '';
}

sub msgverbose {
        my ($string) = @_;
        print STDERR "tarcust:verbose: " . $string . "\n" if $opt_v == 1;
}

sub showversion {
        print STDERR <<EOT;
tarcust 0.9.0 (16-Sep-2001)

Copyright (C) 1999-2001 Denis Barbier <barbier\@engelschall.com>
This program is a free software released under the GNU GPL License; see
the source for copying conditions.  There is NO warranty; not even for
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
EOT
        exit(0);
}

sub usage {
        print STDERR <<EOT;
Usage: tarcust [options] < input.tar > output.tar
Options:
   -h, --help                 show this help and exit
   -V, --version              show the version of tarcust and exit
   -v, --verbose              processes more verbosely
   -D, --force-directory      always create directory entry (deprecated,
                              this option is now always On)
   -u, --user-name=NAME       change owner of all files in the archive
       --owner=NAME           same as -u
   -U, --user-number=NUMBER   numerical id of user
   -g, --group-name=NAME      set the group name of all files in the archive
       --group=NAME           same -as -g
   -G, --group-number=NUMBER  numerical id of group
   -p, --prefix=STRING[:MODE] add a prefix directory name to every file
   -s, --transform-names=EXPR apply substitutions on file and directory names
   -x, --exclude=EXPR         delete file from the archive
   -a, --applies-to=EXPR      apply -u, -U, -g and -G to specific files only
   -e, --eval=EXPR            perform any Perl action on attributes
   -B, --begin-eval=EXPR      Perl expression evalled before reading input
   -E, --end-eval=EXPR        Perl expression evalled after writing output

EOT
        exit(1);
}

#   parse arguments
$opt_h = $opt_v = $opt_d = $opt_V = 0;
$opt_g = $opt_G = $opt_u = $opt_U = $opt_p = $opt_e = $opt_B = $opt_E = '';
@opt_x = @opt_s = ();
$opt_a = '.*';
$opt_D = 1;

$longnames = {
        user_name   => 'u',  owner           => 'u',
        group_name  => 'g',  group           => 'g',
        user_number => 'U',  group_number    => 'G',
        exclude     => 'x',  transform_names => 's',
        prefix      => 'p',  force_directory => 'D',
        applies_to  => 'a',  eval            => 'e',
        before_eval => 'B',  after_eval      => 'E',
};

#   I do not use getopt.pl or Getopt::Long because i want option
#   names to be compatible with tar and tardy. For instance,
#      -u <=> --owner <=> --user-name <=> --user_name
#
for (;;) {
        my ($option, $original_option, $argument);

        last if $#ARGV < 0;
        last if $ARGV[0] eq '-' or $ARGV[0] !~ m/^-/;

        if ($ARGV[0] eq '-h' || $ARGV[0] eq '-?' || $ARGV[0] eq '--help') {
                $opt_h = 1;
                last;
        }
        elsif ($ARGV[0] eq '-v' || $ARGV[0] =~ m/^--verb/ ) {
                $opt_v = 1;
        }
        elsif ($ARGV[0] eq '-d' || $ARGV[0] =~ m/--deb/ ) {
                $opt_d = 1;
        }
        elsif ($ARGV[0] eq '-V' || $ARGV[0] =~ m/^--vers/ ) {
                $opt_V = 1;
                last;
        }
        elsif ($ARGV[0] eq '-D' || $ARGV[0] =~ m/^--force-dir/ ) {
                $opt_D = 1;
        }
        elsif ($ARGV[0] =~ m/^-([gGuUxspaeBE])$/ ) {
                $option = $1;
                if ($#ARGV < 1) {
                        print STDERR "Error:tarcust: missing value to option: $ARGV[0]\n";
                        $opt_h = 1;
                        last;
                }
                shift(@ARGV);
                if ($option =~ m/^[xs]$/) {
                        eval "push(\@opt_$option, \$ARGV[0])";
                }
                else {
                        eval "\$opt_$option = \$ARGV[0]";
                }
        }
        elsif ($ARGV[0] =~ m/^--([^=]*)=(.*)$/) {
                $option = $original_option = $1;
                $argument = $2;
                $option =~ s/[^a-zA-Z0-9]/_/g;
                if (not defined($longnames->{$option})) {
                        print STDERR "Error:tarcust: unknown option: $original_option\n";
                        $opt_h = 1;
                        last;
                }
                if ($option =~ m/^exclude|transform_names$/) {
                        eval "push(\@opt_$longnames->{$option}, \$argument)";
                }
                else {
                        eval "\$opt_$longnames->{$option} = \$argument";
                }
        }
        else {
                print STDERR "Error:tarcust: unknown option: $ARGV[0]\n";
                $opt_h = 1;
                last;
        }
        shift(@ARGV);
}

usage() if $opt_h == 1;
showversion() if $opt_V == 1;

$filein  = *STDIN{IO};
$fileout = *STDOUT{IO};
if ($#ARGV == 0) {
        if ($ARGV[0] ne '-') {
                open($filein, "< $ARGV[0]");
        }
}
elsif ($#ARGV == 1) {
        if ($ARGV[0] ne '-') {
                open($filein, "< $ARGV[0]");
        }
        if ($ARGV[1] ne '-') {
                open($fileout, "> $ARGV[1]");
        }
}
elsif ($#ARGV > 0) {
        usage();
}

#   keep these values in a hash table
$new_values = {
        uname => $opt_u,
        uid   => $opt_U,
        gname => $opt_g,
        gid   => $opt_G,
};

#   --prefix is a synonym for --transform-names=s,^,PATH/,
#   A directory will be inserted at the beginning of the archive,
#   its mode is $dir_mode
$dir_mode = 040755;
if ($opt_p =~ m|^(.*):([0-9]+)$|) {
        $opt_p = $1;
        $dir_mode = $2;
        $dir_mode += 40000 if $dir_mode =~ m|^\d{1,4}$|;
        $dir_mode = oct($dir_mode);
}
push(@opt_s, "s,^,$opt_p/,") if $opt_p ne '';

#   Trap syntax error in -s flag
my $eval_opt_s = '1';
foreach (@opt_s) {
        $eval_opt_s .= "; $_";
}
my $sub_opt_s = eval "sub { my \$ref = shift; \$_ = \$\$ref; $eval_opt_s; die \$@ if \$@; \$\$ref = \$_; return 0}";
die "Invalid -s option" if $@;

#   Build a subroutine to remove files specified with -x flag
my $eval_opt_x = '';
foreach (@opt_x) {
        $eval_opt_x .= "return 1 if m|^$_\$|;";
}
my $sub_opt_x = eval "sub {\$_ = shift; $eval_opt_x; return 0}";
die "Invalid -x option" if $@;

#   main loop
$total_size = 0;
if ($opt_B) {
        eval "{$opt_B}; 1" or die "Expression \`".$opt_B."' invalid in -B flag:\n$@Exiting.\n";
}

while ( read($filein, $header_line, $blocksize) == $blocksize) {
        if ($opt_p ne '') {
                #   A prefix is required, so a directory must be added in
                #   the archive.
                #   Do not forget to apply changes
                $header = init_block($header_line);
                change_values($new_values, $header);

                my $dirs = '';
                foreach (split('/', $opt_p)) {
                        $dirs .= $_ . "/";
                        if (not exists($list_dirs{$dirs})) {
                                #   typeflag=5 for a directory
                                $header->{typeflag}->{value} = 5;
                                $header->{name}->{value} = $dirs;
                                $header->{size}->{value} = 0;
                                $header->{mode}->{value} = $dir_mode;
                                print_header($header);
                                print $fileout hash2string($header);
                                $list_dirs{$header->{realname}} = 1;
                                $total_size += $blocksize;
                                msgverbose("  Write: $header->{'name'}->{'value'}");
                        }
                }
                #   clear $opt_p so that this test is wrong next time
                $opt_p = '';
        }
        last if $header_line eq "\0" x $blocksize;

        $header = init_block($header_line);
        last if $header->{name}->{value} eq '';
        if ($header->{typeflag}->{value} !~ m/^[0-6]?$/) {
                #   this file is not a regular file
                $contents = read_contents($header);
                print STDERR "Warning:tarcust: Don't know how to handle GNU extensions\n";
                print STDERR "                 $header->{'name'}->{'value'} unchanged\n";
                print $fileout hash2string($header);
                print $fileout $contents;
                next;
        }

        msgverbose("File " .  $header->{name}->{value});
        print_header($header);

        change_values($new_values, $header);

        $contents = read_contents($header);

        #   check if this file has been excluded by the -x flag
        if (&$sub_opt_x($header->{'name'}->{'value'})) {
                msgverbose("  Skipped");
                next;
        }
        &$sub_opt_s(\$header->{'name'}->{'value'});
        if ($opt_e) {
                my %F = ();
                foreach (keys %$structure_header) {
                        $F{$_} = $header->{$_}->{value};
                }
                eval "{$opt_e}; 1" or die "Expression \`".$opt_e."' invalid in -e flag:\n$@Exiting.\n";
                foreach (keys %$structure_header) {
                        $header->{$_}->{value} = $F{$_};
                }
        }
        print_header($header);
        $header->{input} = hash2string($header);
        $header_line = add_directory($header);
        print $fileout $header_line if $header_line ne '';
        print $fileout $header->{input};
        print $fileout $contents;
        $list_dirs{$header->{realname}} = 1
                if $header->{typeflag}->{value} == 5;

        $total_size += $blocksize + length($contents);
        msgverbose("  Write: $header->{'name'}->{'value'}");
}

#   Flushes input to avoid the ``broken pipe'' message
undef $/;
$_ = <$filein>;

#   And writes null chars.
print $fileout "\0" x ($blocksize);
$total_size += $blocksize;

#   In GNU tar, the total size is a multiple of 20 512-bytes blocks
if ($total_size % (20*$blocksize) != 0 ) {
        print $fileout "\0" x (20*$blocksize - ($total_size % (20*$blocksize)));
}

if ($opt_E) {
        eval "{$opt_E}; 1" or die "Expression \`".$opt_E."' invalid in -E flag:\n$@Exiting.\n";
}
exit(0);

##EOF##
