# NOTE: THIS IS A SLIGHTLY MODIFIED VERSION OF DISTANCE.PL 
# THE MINIMUM DISTANCE BETWEEN ALL PAIRS OF RESIDUES IS PRINTED TO A SEPARATE FILE IN THIS FORMAT: 45 67 3.14 (WHERE 45 AND 67 ARE RESIDUE NUMBERS)

#!/usr/local/bin/perl -w 


BEGIN 
{
	push(@INC, "/Perl/lib");
	push(@INC, "/Perl/site/lib");
}

use lib "include";
use Bio::Structure::IO;
use Math::Cephes qw(:all);
use strict;

calcDist("null");


# PURPOSE: RETURN 1 IF THE STRING PASSED IS ONE OF THE 20 AMINO ACIDS; RETURN 0 OTHERWISE

sub checkHet
{
	my $res = shift;
	if($res eq "ALA" | $res eq "ARG" | $res eq "ASN" | $res eq "ASP" | $res eq "CYS" | $res eq "GLU" | $res eq "GLN" | $res eq "GLY" | $res eq "HIS" | $res eq "ILE" | $res eq "LEU" | $res eq "LYS" | $res eq "MET" | $res eq "PHE" | $res eq "PRO" | $res eq "SER" | $res eq "THR" | $res eq "TRP" | $res eq "TYR" | $res eq "VAL" )
	{return 1;}
	else
	# het
	{return 0;}
}
		
		
		
# PURPOSE: FOR A GIVEN PAIR OF RESIDUES, COMPUTES AND RETURNS THE MINIMUM DISTANCE 
#          BETWEEN ANY ATOM IN RESIDUE 1 WITH ANY ATOM IN RESIDUE 2
																		
sub minDist
{
	my($resCoordRef, $res1, $res2);
	my($i, $j, $numAtoms1, $numAtoms2);
	my($distance, $minDistSoFar);
	
	#get passed parameters
	$resCoordRef = shift;
	$res1 = shift;
	$res2 = shift;
	
	$minDistSoFar = 100000000000;
	$numAtoms1 = scalar @{$resCoordRef->{$res1}};
	$numAtoms2 = scalar @{$resCoordRef->{$res2}};
	
	#loop over all atoms for res 1 
	for($i = 0; $i < $numAtoms1; $i++ )
	{
		#loop over all atoms for res 2
		for($j= 0; $j < $numAtoms2; $j++ )
		{
			$distance = sqrt( powi($resCoordRef->{$res1}[$i]->[0] - $resCoordRef->{$res2}[$j]->[0], 2) + powi($resCoordRef->{$res1}[$i]->[1] - $resCoordRef->{$res2}[$j]->[1], 2) + powi($resCoordRef->{$res1}[$i]->[2] - $resCoordRef->{$res2}[$j]->[2], 2 ));
			#update minimum distance if necessary
			if($distance < $minDistSoFar)
			{
				$minDistSoFar = $distance;
			}
		}
	}
	return $minDistSoFar;
}


# PURPOSE: COMPUTE THE MINIMUM DISTANCE BETWEEN EACH PAIR OF RESIDUES BY LOOKING AT ALL ATOMS AND
#		   WRITE THE PAIRWISE DISTANCES IN MATRIX FORMAT TO FILE 'distances.txt', WHERE THE DIAGONAL WILL BE 0'S.
#          WHILE WE'RE AT IT, WRITE THE DEFINED RESIDUES(3LETTER AA + RESIDUE NO.) 
#	       IN 'struct.pdb' TO FILE 'pdbseq.txt'

sub calcDist
{	
	my ($stream, $structure, $chainName, $chain, @chains, @residues);
	my ($res, $resId, $resNum, $resType);
	my ($atom, @atoms, %resCoord, @atomCoord);
	my ($idx,$i, $j);
	my $dist;
	my(@resList, $inner, $outer);
		
	#get passed parameter
	$chainName = shift;
	
	#open file 'pdbseq.txt' for writing defined residue names + numbers in PDB file
	#each residue (eg, 742[tab]THR) occupies a line in this file
	open(SEQ, ">data/pdbseq.txt") || die("Cannot open file for outputing residue numbers in PDB structure");
		
	#extract residue numbers from PDB file
	$stream = Bio::Structure::IO->new(-file => "struct.pdb", -format => 'PDB');
	$structure = $stream->next_structure();
	
	#here, pass chain ID as argument if you want residues pertaining to a specific chain
	# alternatively, just pass in the first or default chain if structure is single chain 
	
	@chains = $structure->get_chains();
	if($chainName eq "null")
	{
		
		@residues = $structure->get_residues($chains[0]);
	}
	else
	{
		foreach $chain (@chains)
		{
			if($chain->id() eq $chainName)
			{
				print $chainName;
				@residues = $structure->get_residues($chain); 
				last;
			}
			
		}
	}
	

	foreach $res (@residues)
	{
		#format for $resId is 3-lettercode-dash-residue number (eg PHE-20)
		$resId = $res->id;
		($resType, $resNum) = split(/-/,$resId);
		if($resType eq "HOH")
		{
			#don't need to record waters-- break out of loop
			last;
		}
		else
		{	
			push(@resList, $resNum);
			if(checkHet($resType) == 1)
			{
				print SEQ "$resNum\t$resType\n";
			}
			else
			{
				print SEQ "$resNum\tHET\n";
			}
			#extract x,y,z coordinates of all atoms for each residue
			@atoms = $structure->get_atoms($res);
			$idx =0;
			foreach $atom(@atoms)
			{
				@atomCoord = $atom->xyz();
				push(@{$resCoord{$resNum}[$idx]}, @atomCoord);
				$idx++;
			}
		}
	}
	close SEQ;
      
	
	open(LIST, ">data/distanceList.txt") || die("Cannot open file for outputting min distance list");

	open(MATRIX, ">data/distances.txt") || die("Cannot open file for outputting min distance matrix");

	for($outer = 0; $outer <= $#resList; $outer++)
	{
		for($inner = 0; $inner <= $#resList; $inner++)
		{
			#compute pairwise distance and keep accuracy to 2 decimal places
			$dist = sprintf( "%.2f", minDist(\%resCoord, $resList[$outer], $resList[$inner]) );
			print MATRIX "$dist ";
			print LIST "$resList[$outer] $resList[$inner] $dist\n";
		}
		print MATRIX "\n"
	}
	close MATRIX;
	close LIST;
	
}

