#!/usr/bin/perl
;############ MODIFY THIS LINE WITH YOUR PERL LOCATION ############
push(@INC,"/usr/lib/perl5");
require("flush.pl");

$ENV{LANG} = C;
$ENV{LC_ALL} = C;


;##################################################################
;######## LINUX DISASSEMBLER 2.0799
;######## (C) SiuL+Hacky Jul 1999
;######## You may copy, modify, distribute this program and
;######## is up you to keep this header here
;######## Usage: dasm exe_file dasm_file
;##################################################################

#
# opcode non gestito (e che non si capisce che e':
# 804a2ff:       8d 14 85 00 00 00 00    lea    0x0(,%eax,4),%edx
#                                        lea    (%eax,4),%edx



$f_input=$ARGV[0];
$f_output=$ARGV[1];
&printflush(STDOUT, "\nCreating disassembled file ...\n");
$return=system("objdump -d -T -x --prefix-addresses ".$f_input." > ".$f_output."2");
if ($return!=0){
  print "\nERROR OPENING OBJDUMP $return";
  print "\nUsage: dasm exe_file dasm_file";
  print "\nBe sure to get objdump in your path. Check also file
  permissions\n"; exit(1);
 }

open(INPUT, "<".$f_output."2");

&printflush(STDOUT, "\nReading strings ...");
$_=<INPUT>;
while (!/.rodata/){
  $_=<INPUT>;
}
($rubbish, $rest)=split(/.rodata/,$_,2);
($rubbish, $rest)=split(/0/,$rest,2);
@numbers=split(/  /,$rest,5);
$size=hex($numbers[0]);
$starting_address=hex($numbers[1]);
$end_address=$starting_address+$size;
$offset=hex($numbers[3]);
open(CODIGO, "<".$f_input);
seek(CODIGO,$offset,0);
read(CODIGO,$cadena,$size);
close(CODIGO);


SEARCH: while (<INPUT>){
  last SEARCH if (/SYMBOL TABLE/);
}
if (/SYMBOL TABLE/){ 
  &printflush(STDOUT, "\nProcessing symbol table ...");
  $_=<INPUT>;
  while (!/^\n/){
   @st_element=split(/ /, $_);
   $_=$st_element[$#st_element];
   chop;
   $symbol_table{$st_element[0]}=$_;
   $_=<INPUT>;
  }
}
else {
  seek(INPUT,0,0);
}

while (!/\.text/){
  $_=<INPUT>;
}
&printflush(STDOUT,  "\nProcessing jmps and calls ...");

######### the regex gets rid of possible line information ############


$count=0;
while (<INPUT>){
  $_=~ s/0x0*([0-9]+)/$1/g;
  $_=~ s/<.*?>//g;
  $_=~s/  / /g;
  if (/j/){
    ($direccion,$inst,$destino)=split(/ /,$_,3);
    $destino=~s/ //g;
    chomp($destino);
    $salto{$destino}.=($direccion." \; ");
    $labels{$destino}=("Label".$count++) if not exists $labels{$destino};
    #print "$destino => ".$labels{$destino}."\n";
  }
  elsif (/call[^%]*$/){
    ($direccion,$inst,$destino)=split(/ /,$_,3);
    $destino=~s/ //g;
    chomp($destino);
    $call{$destino}.=($direccion." \; ");
    $labels{$destino}=("Label".$count++) if not exists $labels{$destino};
    #print "$destino => ".$labels{$destino}."\n";
  }
}




seek(INPUT,0,0);
&printflush(STDOUT, "\nSearching for main function ...\n");
while (!/Disassembly of section .text:/){ $_=<INPUT>; }
$found = 0;
$addr = "";
while ( <INPUT> ) {
	$addr = $1 if ( /push +\$0x(.*)/ );
	if ( /call +0x.*/ ) { $found=1; $labels{$addr}="main"; last; }
}
if ( $found == 0 ) { die("main function not found."); }



seek(INPUT,0,0);
&printflush(STDOUT, "\nWriting references ...\n");
open(OUTPUT, ">".$f_output) || die print "\nError opening write file\n";
print OUTPUT "BEGIN\n\n";

while (!/Disassembly of section .text:/){
 $_=<INPUT>;
 #print OUTPUT;
}
$char=".";
$counter=0;
while(<INPUT>){
 chomp;
 $counter++;
 if ( ($counter % 400)==0){
   printflush(STDOUT,$char);
   if ( ($counter % 4000)==0){
     printflush(STDOUT,"\r");
     if ($char eq "."){ $char=" ";}
     else { $char=".";}
   }  
 }
 $copia=$_;
 $_=~s/0x0*([0-9]+)/$1/g;
 $_=~s/<(.*?)>//ge;
 $_=~s/  / /g;
 ($direccion, $inst, $destino)=split(/ /,$_,3);
 
 if ( defined( $symbol_table{$direccion} )){
   print OUTPUT "\n";
   print OUTPUT "---- Function : ".$symbol_table{$direccion}." ----\n";
 }
 if (/call/){
   $destino=~s/ //g;
   chomp($destino);
   if ( defined( $symbol_table{$destino} )){
     print OUTPUT "\n";
     print OUTPUT "Reference to function :
     ".$symbol_table{$destino}."\n\n";
   }
 }  
 #if ( defined( $salto{$direccion} )){
   #print OUTPUT "\n";
   #print OUTPUT "Referenced from jump at ".$salto{$direccion}."\n\n";
   #print OUTPUT $labels{$direccion}.":\n";
 #}
 #if ( defined( $call{$direccion} )){
   #print OUTPUT "\n";
   #print OUTPUT "Referenced from call at ".$call{$direccion}."\n\n";
   #print OUTPUT $labels{$direccion}.":\n";
 #}
 if ( defined( $labels{$direccion} )) {
   print OUTPUT "label\n" . $labels{$direccion}."\n\n";
 }
 if (/\$/){
        ($instruccion, $operand)=split(/\$/,$_,2);
        if (!/push/){
          ($operand, $rest)=split(/\,/,$operand,2);
        }
        chomp($operand);
        $offset=hex($operand);
        if ( ($offset <= $end_address) && ($offset >= $starting_address )
        ){
          $auxiliar=substr($cadena, $offset-$starting_address);
          $length=index($auxiliar, pack("x") );
          $auxiliar=substr($auxiliar, 0, $length);       
          $auxiliar=~s/\n//g;
          #print OUTPUT "\n";
          print OUTPUT "String";
          print OUTPUT "\n\"$auxiliar\"\n\n"
        }       
  }
  $destino=~s/ //g;
  $o = "";
  if (exists $labels{$destino}) {
    $o = "        $inst ".$labels{$destino}."\n";
  } else {
    $o = "        $inst $destino\n";
  }

# The format of an asm instruction understood by uncc is
# opcode
# operand1
# (multiplier)
# (offset)
# operand2
# (multiplier)
# (offset)
# the multiplier and offset are 1 and 0 respectively if not present in the
# original opcode


sub elabora_prefisso
{
	$par = shift;
	$_=$par;
	# 500(%eax|%eax|2)
	if (/^([0-9a-f]+)\(%([^|]+)\|%([^|]+)\|([0-9a-f]+)\)$/) { return "m"; }
	# 500(%eax|%eax)
	if (/^([0-9a-f]+)\(%([^|]+)\|%([^|]+)\)$/) { return "m"; }
	# 500(%eax|2)
	if (/^([0-9a-f]+)\(%([^|]+)\|([0-9a-f]+)\)$/) { return "m"; }
	# 500(%eax)
	if (/^([0-9a-f]+)\(%([^|]+)\)$/) { return "m"; }
	# (%eax|%eax|2)
	if (/^\(%([^|]+)\|%([^|]+)\|([0-9a-f]+)\)$/) { return "m"; }
	# (%eax|%eax)
	if (/^([0-9a-f]+)\(%([^|]+)\|%([^|]+)\)$/) { return "m"; }
	# (%eax|2)
	if (/^\(%([^|]+)\|([0-9a-f]+)\)$/) { return "m"; }
	# (%eax)
	if (/^\(%([^|]+)\)$/) { return "m"; }
	# %eax
	if (/^%(.*)$/) { return "r"; }
	# $12ad3789
	if (/^\$([0-9a-f]+)$/) { return "i"; }
	# 1ad23789
	if (/^([0-9a-f]+)$/) { return "d"; }
	# *%eax
	if (/^\*%(.*)$/) { return "p"; }
	# ...Label...
	if (/^([a-zA-Z][a-zA-Z0-9]*)$/) { return "d"; }

	return "u";
}
sub elabora_parametro
{
	$par = shift;
	$_=$par;
	# 500(%ebx|%eax|2)
 	if (/^([0-9a-f]+)\(%([^|]+)\|%([^|]+)\|([0-9a-f]+)\)$/) { print OUTPUT "$1\n$2\n$3\n$4\n"; return; }
	# 500(%eax|%eax)
	if (/^([0-9a-f]+)\(%([^|]+)\|%([^|]+)\)$/) { print OUTPUT "$1\n$2\n$3\n1\n"; return; }
	# 500(%eax|2)
	if (/^([0-9a-f]+)\(%([^|]+)\|([0-9a-f]+)\)$/) { print OUTPUT "$1\n0\n$2\n$3\n"; return; }
	# 500(%eax)
	if (/^([0-9a-f]+)\(%([^|]+)\)$/) { print OUTPUT "$1\n$2\n0\n1\n"; return; } 
	# (%eax|%eax|2)
	if (/^\(%([^|]+)\|%([^|]+)\|([0-9a-f]+)\)$/) { print OUTPUT "0\n$1\n$2\n$3\n"; return; }
	# (%eax|%eax)
	if (/^\(%([^|]+)\|%([^|]+)\)$/) { print OUTPUT "0\n$1\n$2\n1\n"; return; }
	# (%eax|2)
	if (/^\(%([^|]+)\|([0-9a-f]+)\)$/) { print OUTPUT "0\n0\n$1\n$2\n"; return; }
	# (%eax)
	if (/^\(%([^|]+)\)$/) { print OUTPUT "0\n$1\n0\n1\n"; return; } 
	# %eax
	if (/^%(.*)$/) { print OUTPUT "$1\n"; return; }
	# $12ad3789
	if (/^\$([0-9a-f]+)$/) { print OUTPUT "$1\n"; return; }
	# $1ad23789
	if (/^([0-9a-f]+)$/) { print OUTPUT "$1\n"; return; }
	# *%eax
	if (/^\*%(.*)$/) { print OUTPUT "$1\n"; return; }
	# ...Label...
	if (/^([a-zA-Z][a-zA-Z0-9]*)$/) { print OUTPUT "$1\n"; return; }

	print OUTPUT "UNK\n";
}

sub elabora_opcode
{
	$par = shift;
	$_=$par;

	# jump je ja ... ecc
	if (/^j/) { return ""; }
	# all the opcodes which end with "l"
	if (/^imul$/) {return "l"}
	if (/^lsl$/) {return "l"};
	if (/^mul$/) {return "l"};
	if (/^rcl$/) {return "l"};
	if (/^rol$/) {return "l"};
	if (/^sal$/) {return "l"};
	if (/^shl$/) {return "l"};
	if (/^setl$/) {return "l"};
	if (/^setnl$/) {return "l"};

	# all opcodes which end with "b"
	if (/^sbb$/) {return "l"};
	if (/^setb$/) {return "l"};
	if (/^setnb$/) {return "l"};
	if (/^sub$/) {return "l"};

	# unhandled opcodes
	if (/[lwb]$/) { return ""; }
	
	# default
	return "l";
}

 $o =~ s/\n//g;
 $o =~ s/^ +//g;
 $o =~ s/ +$//g;
 # PATCH Momentanea per togliere gli 0x dalle costanti numeriche
 $o =~ s/0x//g;

 $_ = $o;
 next if (/^of section/);

 $o =~ s/(\(.*),(.*\))/$1|$2/g;
 $o =~ s/(\(.*),(.*\))/$1|$2/g;

 $_ = $o;
 $res = /^([^ ]+) ([^ ]+),([^ ]+)/;
 if ($res) {

 	$opcode = $1;
	$param1 = $2;
	$param2 = $3;
	$opcode .= elabora_opcode($opcode) . "-" . elabora_prefisso($param2) . elabora_prefisso($param1);
 	print ">$o< => OPCODE 3 => >$opcode<\t>$param1<\t>$param2<\n";
	print OUTPUT "$opcode\n";
	elabora_parametro($param2);
	elabora_parametro($param1);
	print OUTPUT "\n";

 } else {

 	$_ = $o;
 	$res = /^([^ ]+) ([^ ]+)/;
	if ($res) {

		$opcode = $1;
		$param1 = $2;
		$opcode .= elabora_opcode($opcode) . "-" . elabora_prefisso($param1);
 		print ">$o< => OPCODE 2 => >$opcode<\t>$param1<\n";
		print OUTPUT "$opcode\n";
		elabora_parametro($param1);
		print OUTPUT "\n";

	} else {

		$_ = $o;
		$res = /^([^ ]+)$/;
		if ($res) {
			$opcode = $1;
			print ">$o< => OPCODE 1 => >$opcode<\n";
			print OUTPUT "$opcode\n\n";
		} else {
			print ">$o< => UNKN\n\n";
			print OUTPUT "UNK\n\n";
		}

	}
	
 }


}
print OUTPUT "END\n\nGenerated by a modified version of Dasm ;)\n\n";
close(INPUT);
close(OUTPUT);
print "\n";
system("rm ".$f_output."2");
