#!/usr/bin/perl -w

eval('use KATzip 1.02; 1') or extractFiles();
require KATzip;
use strict;
use integer;

my $filename = shift;
my $user = 'user';
my $passkey = '';

if ($filename =~ m/.kz$/) { unzipit(); }
else { zipit(); }
exit(0);


sub zipit() {
    my $data;

    (@ARGV) and ($user = shift(@ARGV));
    (@ARGV) and ($passkey = shift(@ARGV));
    $user =~ s/.key$//;
    open(DAT, "<$filename") or die("Can't open $filename");
    binmode(DAT);
    my $len = -s DAT;
    read(DAT, $data, $len);
    close(DAT);

    $data = KATzip::kat_encrypt($user, $passkey, \$data);
    print("$filename ($len) -> $filename.kz (", length($$data), ")\n");

    open(ZIP, ">$filename.kz") or die("Can't open $filename.kz");
    binmode(ZIP);
    print ZIP (length($$data), " $user\n", $$data);
    close(ZIP);
}

sub unzipit() {
    my $hdr;
    my $zdata;
    my $data;

    (@ARGV) and ($passkey = shift(@ARGV));
    open(ZIP, "<$filename") or die("Can't open $filename");
    binmode(ZIP);
    $hdr = <ZIP>;
    ($hdr =~ m/^(\d+)\s(\S+)/)
        or die("?Bad header in $filename.kz");
    my $len = $1;
    $user = $2;
    $filename =~ s/.kz$//;
    read(ZIP, $zdata, $len);
    close(ZIP);

    print("[$user]  $filename\n");
    $data = KATzip::kat_decrypt($user, $passkey, \$zdata);

    ($$data) or die("Can't unzip $filename; probably wrong keyset/passkey");
    open(DAT, ">$filename") or die("Can't open $filename");
    binmode(DAT);
    print DAT ($$data);
    close(DAT);
}

sub extractFiles() {
    my $file = '';

    while (<DATA>) {
        if (m/^>>(\S+)/) {
            printf("Extracting %s\n", $file = $1);
            open(OUT, ">$file") or die("?Can't create $file");
            next;
        }
        s/^p=/=/;
        ($file) and print OUT ($_);
    }
    close(OUT);
}

=head1 NAME

KAT.PL - File encryption (with compression).

=head1 SYNOPSIS

    encryption - perl kat.pl filename keyset [passkey]

    decryption - perl kat.pl filename.kz [passkey]

=head1 DESCRIPTION

This program encrypts/decrypts a file using a key selected from a keyset file.
This file will be compressed before encryption
and decompressed after decryption.
When encrypting a file, the output file name will be F<filename> with
a B<.kz> extension added.
If F<filename> has an extension B<.kz>, the file will be decrypted.

The F<keyset> file must have an extension of B<.key>.
It is not necessary to enter the extension (.key) on the command line.
A passkey can be used to scramble the key selector,
effectively password protecting the keyset file.
F<keyset> will default to 'F<user.key>' (if not using a passkey).
A keyset file can be used to produce a large number of keys (2**64).
A key will be randomly selected from the keyset for each file encrypted.
Generally, a key will only be used once.
A key will be the same size as the file being encrypted.

=head1 SETUP

The F<kat.pl> program file consists of three files concatenated together.
The files will be automatically extracted the first time the program is run.
The two additional files are:

=over 4

=item *

KATzip.pm - module with the encryption and decryption functions.

=item *

bits.pl - a program for testing purposes.

=back

Keyset files must be generated.  A keyset is simply a file, but the data
in the file should be as random as possible.  The degree of randomness 
within the keyset file data determines how effective the KAT encryption
will be.

Select a file to use as a temporary keyset.  It should be a binary file
(text files are a bad choice), preferably a compressed file or an
encrypted file.  Make a copy of it naming it with an extension of B<.key>.
Then select another file and encrypt it with the temporary keyset.

Repeat this process several times using the encrypted file produced as
a keyset to encrypt another file to produce a new keyset.
A properly created keyset will contain data that will be indistinquishable
from truly random data.  A keyset file must be at least 10000 bytes.
The F<bits.pl> program can be used to check the randomness of
the data in your keyset.

See the documentation in the F<KATzip.pm> module for an explanation
of how keysets are used by the encryption/decryption functions.

=head1 PREREQUISITES

This script requires that the C<Compress::Zlib> module be installed.
It also uses the C<strict> and C<integer> modules.

=head1 LIMITATIONS

This implementation reads an entire file into memory and encrypts it
as a single block.  That imposes limits on the maximum file size which
can be encrypted.  Also, the processing overhead of the gzip functions
seems to increase significantly with larger blocks.  This is not a
problem for experimental purposes.  For a practical encryption system,
the design would have to be enhanced to divide files into blocks and
encrypt each block separately.

=head1 WARNING

This encryption system is intended for B<EXPERIMENTAL> purposes only!

=head1 AUTHOR

    B. Yarber.
    yarber@paperweb.ws

Comments, critisizims, and questions are welcomed.

=cut

__END__
>>KATzip.pm
#!/usr/bin/perl -w

package KATzip;
require 5.005;
require Exporter;
@ISA = ('Exporter');
@EXPORT = qw(kat_encrypt kat_decrypt);
@EXPORT_OK = qw(scramble);
$VERSION = 1.02;

use Compress::Zlib(qw/compress uncompress/);
use strict;
use integer;

sub applyKeys($$$\$);

sub kat_encrypt($$\$) {
    my $user = shift;
    my $passkey = shift;
    my $data = shift;
    my $zdata = compress($data);
    my $keys = selectKeys($user);

    applyKeys($user, $passkey, $keys, $zdata);
    $zdata .= $keys;
    return(\$zdata);
}

sub kat_decrypt($$\$) {
    my $user = shift;
    my $passkey = shift;
    my $zdata = shift;
    my $data;
    my $keys = substr($$zdata, -8, 8, '');

    applyKeys($user, $passkey, $keys, $$zdata);
    $data = uncompress($zdata);
    return(\$data);
}

sub applyKeys($$$\$) {
    my $user = shift;
    my $passkey = shift;
    my $keys = shift;
    my $data = shift;
    my $len = length($$data);
    my $keyset = readKeys($user, $len + (64 * 128));

    $keys = unpack("B64", $keys);
    ($passkey) and
        ($keys = unpack("B512", scramble($passkey . $keys))) =~ s/.{7}(.)/$1/g;
    my $k1 = reverse($keys x 7);
    my $x1 = -128;
    foreach (split(//, $keys)) {
        $x1 += 128;
        ($_) or next;
        $k1 =~ m/(.{7})/gc;
        my $k7 = ord(pack("B8", "0$1"));
        my $key = substr($$keyset, $x1 + $k7, $len);
        $$data ^= $key;
    }
}

sub readKeys($$) {
    my $user = shift;
    my $len = shift;
    my $key;

    open(KEY, "<$user.key") or die("?Can't open $user.key");
    binmode(KEY);
    (-s KEY > 10000) or die("?$user.key file too small");
    seek(KEY, 100, 0);
    read(KEY, $key, $len);
    close(KEY);
    (length($key) > 0) or die("?Can't read $user.key");
    while (length($key) < $len) {
        my $x1 = $len - length($key);
        $key .= substr($key, 0, $x1);
    }
    return(\$key);
}

sub selectKeys($) {
    my $user = shift;
    my $key = 0x0 x 8;
    my $ptr = readKeys($user, 8 * 100);

    foreach (0..99) {
        (int(rand(2)) == 1) or next;
        $key ^= substr($$ptr, $_ * 8, 8);
    }
    ($key =~ m/^\x00+$/) and die("?Null key selected");
    return($key);
}

sub scramble($) {
    my $data = shift;
    my $eggs = '<abcdefghijklmnopqrstuvwxyz'
             . 'ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789>';
    my $offset = 19;
    my $val;

    $data =~ tr/a-zA-Z0-9/\xe1-\xfa\xc1-\xda\xb0-\xb9/;
    $eggs = $data . ($eggs x 4);
    my $len = length($eggs);
    foreach (1..7) {
        foreach (0..($len - 1)) {
            $val = substr($eggs, $_, 1);
            substr($eggs, $_, 1) = substr($eggs, $offset, 1, $val);
            $offset += (ord($val) % ($len-1));
            ($offset >= $len) and ($offset -= $len);
        }
    }
    $eggs =~ s/[\W_]//g;
    return($eggs);
}

1;

__END__

p=head1 NAME

KATzip.pm - Kat encryption/decryption functions.

p=head1 SYNOPSIS

  use KATzip;
  $ptr = kat_encrypt($keyset, $passkey, $data);
  $ptr = kat_decrypt($keyset, $passkey, $data);
  $signature = KATzip::scramble($data);

p=head1 DESCRIPTION

kat_encrypt - compresses and encrypts data
using a randomly selected key from the keyset file.

kat_decrypt - decrypts and expands data using the key from the keyset file.

scramble - generates a digital signature for a block of text.

The keyset file is used to provide a large number of keys (2**64)
for encryption.  As implemented, the Kat algorithm will randomly select
a key from the keyset.  The chance of a key being used more than once
is very small.  To prevent any chance of a key being used twice,
the algorithm would have to be enhanced to keep track of the keys used.
A passkey can be used to scramble the keyset selector to effectively
password protect the keyset.

p=head2 Key Characteristics

p=over 4

p=item *

A key will be the same length as the data (after compression) being encrypted.

p=item *

The creation of a key is not related in any way to the data being encrypted.

p=item *

Each key is a random string of bits.
(No part of a key is a function of any other part of the key).

p=item *

Approximatly half the bits in a key will be zeros (half ones).

p=item *

When comparing any two keys (of equal length),
approximatly half the bits will be different.

p=back

p=head2 Key Generation and Use

The keyset file is used for creating keys for encryption.  
It should be at least 10000 bytes in length.  See the documentation in
F<kat.pl> for suggestions for creating a keyset file.
The keyset is viewed as an array of bytes.
Creating a key involves selecting several strings of bytes from the array.
Each string is copied from a different offset from the beginning
of the array.  (The strings may overlap).

First, a key selector is generated.  It is simply a 64-bit random number.
The selector is used to determine which strings to copy from the keyset
array.  The selector will specify from 1 to 64 offsets (typically about 32)
into the array from a total of 8192 possible offsets.
For each offset, a string of bytes is copied equal in length to the data
being encrypted.  If the keyset file is smaller than the data,
the string is concatentated to itself one or more times
producing a string of the needed length.  The string is then
exclusive or'ed to the data string.  This is repeated for each
offset specified by the selector.

Encryption is simply eoring data with a key: 

  $data ^= $key;

If the key is a string of random bits, about half the bits in the data
string will be complemented.  For each bit position in the key containing
a '1', the bit in the corresponding position of the data is complemented
(a '1' changes to '0', or a '0' changes to '1').  All the strings selected
from the keyset can be eored together to produce a key which is eored
to the data, or each of the selected strings can be eored to the data
separately; the result is the same.

The objective of the key selection process is to produce a string of
random numbers, each either a '0' or a '1'.  The number of '1' bits in the
key selector (which is also a random number) determines the number of
strings selected from the keyset.  Each corresponding bit position
among the strings forms a group of '0's and '1's.  Eoring these bits
together will produce a result based on the number of '1's being even (a '0')
or odd (a '1').  This is intended to simulate a series of random events
such as flipping a coin the number of times determined by the key selector
and counting whether the number of times heads comes up is even or odd.

p=head2 Digital signatures

This B<scramble> function produces a digital signature for a block of data.
It starts with a set of 62 eggs consisting of letters A thru Z,
upper and lower case, and digits 0 thru 9.
The data is mixed with 4 sets of eggs and then scrambled (irreversably).
The data is then removed from the mix
and the scrambled eggs are returned to the caller.
For the technical details of the algorithm, read the code.
It's really quite trivial.
(Note: this function has not been tested with unicode data).

This algorithm developed from a need for the use of digital signatures
in client/server applications where the server is developed in perl
and the client in javascript (running in a browser).
The objective was to have an algorithm which could be easily implemented
in both languages and produce identical results.
The following is the algorithm implemented in javascript.
(Note: a few of the functions used are not standard ECMAscript.
Sorry 'bout that, Chief...).
  

  function scramble(data) {
      var basket = '<abcdefghijklmnopqrstuvwxyz'
                 + 'ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789>';
      var offset = 19;
      var len, val, x1, x2;

      var eggs = data.split('');
      var x1 = basket + basket + basket + basket;
      for (x2 = 0; x2 < eggs.length; ++x2)
          if (eggs[x2].match(/^([a-zA-Z0-9])/))
              eggs[x2] = String.fromCharCode(
                   eggs[x2].charCodeAt(0) + 128);
      eggs = eggs.concat(x1.split(''));
      len = eggs.length;
      for (x1 = 0; x1 < 7; ++x1) {
          for (x2 = 0; x2 < len; ++x2) {
              val = eggs[x2];
              eggs[x2] = eggs[offset];
              eggs[offset] = val;
              offset += (val.charCodeAt(0) % (len-1));
              (offset >= len) && (offset -= len);
          }
      }
      eggs = eggs.join('');
      return(eggs.replace(/[\W_]/g, ''));
  }


p=head1 WARNING

The level of encryption security that the Kat algorithm might provide
has not been determined.  It must be considered to be absolutely
completely and unconditionally B<EXPERIMENTAL>.  Any other use
would be foolish.  Consider carefully:

  Theorem:  I could be mistaken.
    Proof:  I think, therefore, I err.

p=head1 AUTHOR

  B. Yarber.
  yarber@paperweb.ws

p=cut
>>bits.pl
#!/usr/bin/perl -w

use strict;
use integer;

my $filename = shift;
my $data;

if (@ARGV and $ARGV[0] =~ /^\d+$/) {
    examineKeys(shift);
    exit(0);
}
readData();
if (@ARGV) {
    $filename = shift;
    compareFile();
}
countBits();
countRepeats();
exit(0);

sub readData() {
    open(DAT, "<$filename") or die("Can't open $filename");
    binmode(DAT);
    sysread(DAT, $data, 10000000); #limit large file size for reasonable mem use
    $data =~ s/^[\x0d-\x7e]+\n//;  #remove header line (if any)
    close(DAT);
}

sub compareFile() {
    my $data2 = $data;

    readData();
    (length($data) == length($data2)) or die("File lengths are different");
    $data ^= $data2;
    open(DIF, ">kat.dif") or die("Can't open kat.dif");
    binmode(DIF);
    syswrite(DIF, $data);
    close(DIF);
}

sub countBits() {
    my $list = unpack('B*', $data);
    $list =~ tr/0//d;
  
    my $len = length($data);
    my $total = length($list);
    my $avg = $len * 4;
    my $dif = $total - $avg;
    print("$len bytes, $total 1-bits ($avg + $dif)\n\n");
}

sub countRepeats() {
    my @count;

    my $list = unpack("B*", $data);
    $list =~ s/(0+|1+)/++$count[length($1)],''/eg;

    foreach (1..$#count) {
        ($count[$_]) or next;  
        printf("%4d %8d\n", $_, $count[$_]);
    }
}

sub examineKeys($) {
    require KATzip;
    my ($len, $len1) = shift;
    my $key = KATzip::readKeys($filename, $len + 8192);
    my ($keyx, $key1) = ("\0" x $len);
    my @sum;
    my $total = 0;
    my $avg = $len * 4;
    my $mincount = 999999999;
    my $maxcount = 0;

    no integer;
    foreach (0..8191) {
        $keyx ^= substr($$key, $_, $len);
        $key1 = unpack('B*', $keyx);
        $key1 =~ tr/0//d;
        $sum[$_] = $len1 = length($key1);
        $total += $len1;
        ($len1 < $mincount) and ($mincount = $len1);
        ($len1 > $maxcount) and ($maxcount = $len1);
    }
    print("average: ", $total / 8192, "\n");
    print("min $mincount (", $mincount - $avg,
        ")   max $maxcount (", $maxcount - $avg, ")\n");
    $avg = $total / 8192;
    $total = 0;
    foreach (@sum) { $total += ($_ - $avg) ** 2 } 
    $total = sqrt($total / 8192);
    print("sd = $total\n");
    my ($x1, $x2, $x3) = (0, 0, 0); 
    foreach (@sum) {
        my $dif = abs($_ - $avg);
        if ($dif < $total) { $x1 += 1 }
        elsif ($dif < ($total * 2)) { $x2 += 1 }
        elsif ($dif < ($total * 3)) { $x3 += 1 }
    }
    printf("%6d < 1 sd\n%6d < 2 sd\n%6d < 3 sd\n%6d > 3 sd\n",
        $x1, $x2, $x3, 8192 - ($x1 + $x2 + $x3));
}

__END__

p=head1 NAME

bits.pl - count bits in a file.

p=head1 SYNOPSIS

    perl bits.pl filename
    perl bits.pl filename filename2
    perl bits.pl keyset length

p=head1 DESCRIPTION

This program will count bits in a file.
First, it counts the number of '1' bits.
Then it counts the number of repeating bit strings producing
separate counts based on string lengths.

If two files are specified, the files are compared to produce a
string indicating which bits are different between the files.
The counts are then done for this difference.

When given a keyset name and a length, it will count the '1' bits
for all the possible strings (8192) for producing keys
(of the specified length).  It then lists the average, minimum
and maximum counts, and standard deviation.

p=head1 AUTHOR

    B. Yarber.
    yarber@paperweb.ws

p=cut
