#!/usr/bin/python

#=============================================================================================
# Prepare FKBP complexes from Michael Shirts in AMBER format for GB/SA simulation with YANK.
#
# John D. Chodera <jchodera@gmail.com>
# QB3-Berkeley
# 11 Sep 2009 
#=============================================================================================

#=============================================================================================
# COPYRIGHT NOTICE
#
# Written by John D. Chodera <jchodera@gmail.com>.
#
# Copyright (c) 2009 The Regents of the University of California.  All Rights Reserved.
#
# This program is free software; you can redistribute it and/or modify it under the terms of
# the GNU General Public License as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
# 
# This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
# without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
# See the GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public License along with this program;
# if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
# Boston, MA  02110-1301, USA.
#=============================================================================================

#=============================================================================================
# IMPORTS
#=============================================================================================

import commands
import os
import os.path
import shutil

from mmtools.moltools.ligandtools import *
from mmtools.gromacstools.MdpFile import *

#=============================================================================================
# PARAMETERS
#=============================================================================================

editconf = 'editconf_d' # name of gromacs 'editconf' executable

#=============================================================================================
# SUBROUTINES
#=============================================================================================

def write_file(filename, contents):
    """Write the specified contents to a file.

    ARGUMENTS
      filename (string) - the file to be written
      contents (string) - the contents of the file to be written

    """

    outfile = open(filename, 'w')

    if type(contents) == list:
        for line in contents:
            outfile.write(line + '\n')
    elif type(contents) == str:
        outfile.write(contents)
    else:
        raise "Type for 'contents' not supported: " + repr(type(contents))

    outfile.close()

    return
#=============================================================================================
def read_file(filename):
    """Read contents of the specified file.

    ARGUMENTS
      filename (string) - the name of the file to be read

    RETURNS
      lines (list of strings) - the contents of the file, split by line
    """

    infile = open(filename, 'r')
    lines = infile.readlines()    
    infile.close()

    for i in range(len(lines)):
        lines[i] = lines[i].strip()    

    return lines
#=============================================================================================
def rename_pdb_for_amber(protein_pdb_filename, protein_amberpdb_filename, ligand_name):
    """Rename residues in a PDB file to match AMBER naming convention, omitting atoms AMBER will not recognize.
    We also eliminate waters and ligand atoms.

    ARGUMENTS
      protein_pdb_filename (string) - name of source PDB file to be read
      protein_amberpdb_filename (string) - name of PDB file to be written with AMBER naming conventions

    """
    
    # read the PDB file
    lines = read_file(protein_pdb_filename)

    # allocate storage for target file contents
    outlines = list()

    # parse PDB file, creating new contents
    for line in lines:
        # strip newline
        line = line.rstrip()
        
        # only ATOM records will be retained
        if line[0:6] == "ATOM  ":
            # Parse line into fields.
            serial = line[6:11]
            name = line[12:16]
            altLoc = line[16:17]
            resname = line[17:21]
            chainid = line[21:22]
            resseq = line[22:26]
            idcode = line[26:27]
            remainder = line[27:]

            # drop the line it is a hydrogen
            if name[1] == 'H': continue
            # phosphorylated residues have a strange naming convention on their hydrogens
            if name[0] == 'H': continue

            # make residue name substitutions
            if resname == 'GYN ': resname = 'GLY '
            if resname == 'GUC ':
                resname = 'GLU '
                # rename terminal oxygens
                if name == ' O  ': name = ' OXT'
                if name == ' O2 ': name = ' O  '
                
            if (resname == 'ILE '):
                if name == ' CD ': name = ' CD1'
                
#            if (resname == 'CLY '):
#                resname = 'LYS '
#                # rename terminal oxygens
#                if name == ' OC1': name = ' OXT'
#                if name == ' OC2': name = ' O  '
            
            # drop ligand
            if resname.strip() == ligand_name.strip(): continue

            # renumber serial
            serial = '% 5d' % (len(outlines) + 1)
            
            # reconstitute line
            outline = 'ATOM  ' + serial + ' ' + name + altLoc + resname + chainid + resseq + idcode + remainder
            outlines.append(outline)

    write_file(protein_amberpdb_filename, outlines)
            
    return
#=============================================================================================
def extract_ligand_pdb(protein_pdb_filename, ligand_name, ligand_pdb_filename):
    """Extract ligand PDB from complex PDB file.

    ARGUMENTS
      protein_pdb_filename (string) - name of source PDB file to be read
      ligand_name (string) - ligand residue name in PDB file
      ligand_pdb_filename (string) - ligand PDB filename to write
      
    """
    
    # read the PDB file
    lines = read_file(protein_pdb_filename)

    # allocate storage for target file contents
    outlines = list()

    # parse PDB file, creating new contents
    for line in lines:
        # strip newline
        line = line.rstrip()
        
        # only ATOM records will be retained
        if line[0:6] == "ATOM  ":
            # Parse line into fields.
            serial = line[6:11]
            name = line[12:16]
            altLoc = line[16:17]
            resname = line[17:21]
            chainid = line[21:22]
            resseq = line[22:26]
            idcode = line[26:27]
            remainder = line[27:]

            # save ligand
            if resname.strip() == ligand_name.strip():
                # rename atom
                if name in [' c  ', ' ca ', ' c3 ', ' c2 ']:
                    name = ' C  '
                if name in [' ha ', ' hc ', ' h1 ', ' ho ']:
                    name = ' H  '
                if name in [' o  ', ' os ', ' oh ']:
                    name = ' O  '
                if name in [' n  ']:
                    name = ' N  '

                # renumber serial
                serial = '% 5d' % (len(outlines) + 1)
                # reconstitute line
                outline = 'ATOM  ' + serial + ' ' + name + altLoc + resname + chainid + resseq + idcode + remainder
                outlines.append(outline)

    # write ligand file
    write_file(ligand_pdb_filename, outlines)
            
    return
#=============================================================================================
def parameterize_ligand(ligand_filename, ligand_name, work_path):
    """Parameterize ligand.

    ARGUMENTS
      ligand_filename (string) - name of mol2 file describing ligand
      work_path (string) - name of directory to place files in
      
    """

    # get current directory
    current_path = os.getcwd()

    # create the work directory if it doesn't exist
    if not os.path.exists(work_path):
        os.makedirs(work_path)

    # SET UP LIGAND TOPOLOGY

    print "\nCONSTRUCTING LIGAND TOPOLOGY"

    # Read the ligand into OEMol.
    print "Reading ligand..."
    ligand = readMolecule(ligand_filename, normalize = True)

    # Set name
    print "ligand_name = %s" % ligand_name
    ligand.SetTitle(ligand_name)
    
    # Get formal charge of ligand.
    ligand_charge = formalCharge(ligand)
    print "formal charge is %d" % ligand_charge

    # Change to working directory.
    os.chdir(work_path)
    
    # Write molecule with explicit hydrogens to mol2 file.
    print "Writing ligand mol2 file..."
    ligand_mol2_filename = '%(ligand_name)s.openeye.mol2' % vars()
    writeMolecule(ligand, ligand_mol2_filename)

    # Set substructure name (which will become residue name).
    print "Modifying molecule name..."
    modifySubstructureName(ligand_mol2_filename, 'MOL')

    # Run antechamber to assign GAFF atom types.
    print "Running antechamber..."
    gaff_mol2_filename = '%(ligand_name)s.gaff.mol2' % vars()
    charge_model = 'bcc'
    command = 'antechamber -i %(ligand_mol2_filename)s -fi mol2 -o %(gaff_mol2_filename)s -fo mol2 -c %(charge_model)s -nc %(ligand_charge)d >& antechamber.out' % vars()
    print command
    output = commands.getoutput(command)    
    
    # skip if antechamber didn't work
    if not os.path.exists(gaff_mol2_filename):
        # Restore old directory
        os.chdir(current_path)        
        return

    # Generate frcmod file for additional GAFF parameters.
    ligand_frcmod_filename = '%(ligand_name)s.frcmod' % vars()
    output = commands.getoutput('parmchk -i %(gaff_mol2_filename)s -f mol2 -o %(ligand_frcmod_filename)s' % vars())
    print output

    # skip if parmchk didn't work
    if not os.path.exists(ligand_frcmod_filename):
        # Restore old directory
        os.chdir(current_path)        
        return

    # Run LEaP to generate topology / coordinates.
    ligand_prmtop_filename = '%(ligand_name)s.prmtop' % vars()
    ligand_crd_filename = '%(ligand_name)s.crd' % vars()
    ligand_off_filename = '%(ligand_name)s.off' % vars()
    
    tleap_input_filename = 'setup-ligand.leap.in'
    tleap_output_filename = 'setup-ligand.leap.out'
    contents = """
# Load GAFF parameters.
source leaprc.gaff

# load antechamber-generated additional parameters
mods = loadAmberParams %(ligand_frcmod_filename)s

# load ligand
ligand = loadMol2 %(gaff_mol2_filename)s

# check the ligand
check ligand

# report net charge
charge ligand

# save AMBER parameters
saveAmberParm ligand %(ligand_prmtop_filename)s %(ligand_crd_filename)s

# write .off file
saveOff ligand %(ligand_off_filename)s

# exit
quit
""" % vars()
    write_file(tleap_input_filename, contents)
    command = 'tleap -f %(tleap_input_filename)s > %(tleap_output_filename)s' % vars()
    output = commands.getoutput(command)
    
    # extract total charge
    ligand_charge = commands.getoutput('grep "Total unperturbed charge" %(tleap_output_filename)s | cut -c 27-' % vars())
    ligand_charge = int(round(float(ligand_charge))) # round to nearest whole charge
    print "ligand charge is %d" % ligand_charge    

    # Restore old directory
    os.chdir(current_path)

    return

#=========================================================================================
# MAIN
#=============================================================================================

# Make a list of all ligands
ligand_names = commands.getoutput("ls mol2 | sed -e 's/.mol2//'").split()
print ligand_names

for ligand_name in ligand_names:
    print ligand_name

    # Create destination directory.
    dest_directory = 'amber-gbsa/%(ligand_name)s' % vars()
    os.makedirs(dest_directory)

    # Assemble source complex PDB filename.
    complex_pdb_filename = 'fkbp-complex/%(ligand_name)s/pdbs/%(ligand_name)s.complex.0.pdb' % vars()

    # Extract ligand from complex and generate mol2.
    ligand_pdb_filename = os.path.join(dest_directory, '%(ligand_name)s.pdb' % vars())
    extract_ligand_pdb(complex_pdb_filename, ligand_name, ligand_pdb_filename)
    molecule = readMolecule(ligand_pdb_filename)
    source_mol2_filename = os.path.join(dest_directory, '%(ligand_name)s.mol2' % vars())
    writeMolecule(molecule, source_mol2_filename)
    
    # Parameterize ligand with antechamber.
    parameterize_ligand(source_mol2_filename, ligand_name, dest_directory)
    
    # Extract protein heavy atoms only, and rename to AMBER conventions.
    protein_ambernames_pdbfilename = '%(dest_directory)s/receptor.pdb' % vars()
    rename_pdb_for_amber(complex_pdb_filename, protein_ambernames_pdbfilename, ligand_name)
    
    # Set up complex in LEaP.
    contents = read_file('setup.leap.in')
    for i in range(len(contents)):  contents[i] = contents[i] % vars() # replace variables
    write_file('%(dest_directory)s/setup.leap.in' % vars(), contents)
    command = 'pushd . ; cd %(dest_directory)s ; tleap -f setup.leap.in ; popd' % vars()
    output = commands.getoutput(command)
    write_file('%(dest_directory)s/setup.leap.out' % vars(), output)

    # Generate PDB files.
    command = 'cat %(dest_directory)s/ligand.crd | ambpdb -p %(dest_directory)s/ligand.prmtop > %(dest_directory)s/ligand.pdb' % vars()
    print commands.getoutput(command)
    command = 'cat %(dest_directory)s/receptor.crd | ambpdb -p %(dest_directory)s/receptor.prmtop > %(dest_directory)s/receptor.pdb' % vars()
    print commands.getoutput(command)
    command = 'cat %(dest_directory)s/complex.crd | ambpdb -p %(dest_directory)s/complex.prmtop > %(dest_directory)s/complex.pdb' % vars()
    print commands.getoutput(command)

    # Minimize (this may take a while)
    print "Minimizing..."
    command = 'cp min.sander.in %(dest_directory)s/min.sander.in' % vars()
    print commands.getoutput(command)
    command = 'pushd . ; cd %(dest_directory)s; sander -O -i min.sander.in -o ligand-min.sander.out -p ligand.prmtop -c ligand.crd -r ligand-minimized.crd ; popd' % vars()
    print commands.getoutput(command)
    command = 'pushd . ; cd %(dest_directory)s; sander -O -i min.sander.in -o receptor-min.sander.out -p receptor.prmtop -c receptor.crd -r receptor-minimized.crd ; popd' % vars()
    print commands.getoutput(command)
    command = 'pushd . ; cd %(dest_directory)s; sander -O -i min.sander.in -o complex-min.sander.out -p complex.prmtop -c complex.crd -r complex-minimized.crd ; popd' % vars()
    print commands.getoutput(command)

    # Evaluate single-point energy
    print "Evaluating energy..."
    command = 'cp testenergy.sander.in %(dest_directory)s/testenergy.sander.in' % vars()
    print commands.getoutput(command)
    command = 'pushd . ; cd %(dest_directory)s; sander -O -i testenergy.sander.in -o ligand-testenergy.sander.out -p ligand.prmtop -c ligand-minimized.crd -r ligand-testenergy.crd ; popd' % vars()
    print commands.getoutput(command)
    command = 'pushd . ; cd %(dest_directory)s; sander -O -i testenergy.sander.in -o receptor-testenergy.sander.out -p receptor.prmtop -c receptor-minimized.crd -r receptor-testenergy.crd ; popd' % vars()
    print commands.getoutput(command)
    command = 'pushd . ; cd %(dest_directory)s; sander -O -i testenergy.sander.in -o complex-testenergy.sander.out -p complex.prmtop -c complex-minimized.crd -r complex-testenergy.crd ; popd' % vars()
    print commands.getoutput(command)
    
    # Generate PDB file of minimized complex
    command = 'cat %(dest_directory)s/complex.crd | ambpdb -p %(dest_directory)s/complex.prmtop > %(dest_directory)s/complex.pdb' % vars()
    print commands.getoutput(command)