#!/usr/local/bin/env python

#=============================================================================================
# MODULE DOCSTRING
#=============================================================================================

"""
Extract distance trajectories from WCA dimer trajectories in various conditions.

DESCRIPTION

COPYRIGHT

@author John D. Chodera <jchodera@gmail.com>

All code in this repository is released under the GNU General Public License.

This program is free software: you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation, either version 3 of the License, or (at your option) any later
version.

This program is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
PARTICULAR PURPOSE.  See the GNU General Public License for more details.
 
You should have received a copy of the GNU General Public License along with
this program.  If not, see <http://www.gnu.org/licenses/>.

TODO

"""

#=============================================================================================
# GLOBAL IMPORTS
#=============================================================================================

import os
import os.path
import sys
import math
import copy
import time

import numpy

import simtk.unit as units
    
#import Scientific.IO.NetCDF as netcdf # for netcdf interface in Scientific
import netCDF4 as netcdf # for netcdf interface provided by netCDF4 in enthought python

#=============================================================================================
# SUBROUTINES
#=============================================================================================

#=============================================================================================
# MAIN AND TESTS
#=============================================================================================

if __name__ == "__main__":

    datadir = 'data' # directory in which data is stored
    processed_datadir = 'processed-data' # directory in which processed data is placed

    #
    # Analyze trajectories.
    #

    prefixes = ['md-vacuum', 'mc-vacuum', 'md-solvent', 'mc-solvent', 'ncmc-solvent']

    for prefix in prefixes:
        print prefix
        
        # Form filename.
        filename = os.path.join(datadir, prefix + '.nc')

        # Open NetCDF file for reading
        # ncfile = netcdf.NetCDFFile(filename, 'r') # for Scientific.IO.NetCDF
        ncfile = netcdf.Dataset(filename, 'r') # for netCDF4
        
        niterations = ncfile.variables['distance'][:].shape[0]
        
        # Write distances to Matlab-readable file.
        output_filename = os.path.join(processed_datadir, prefix + '.txt')
        outfile = open(output_filename, 'w')
        for iteration in range(niterations):
            distance = ncfile.variables['distance'][iteration]

            # Check for NaNs
            if numpy.isnan(distance):
                print "file '%s' iteration %d shows NaN distance!" % (filename, iteration)
                break
            
            outfile.write('%12.3f\n' % distance)
        outfile.close()

        # Compute mean GHMC acceptance.
        if 'fraction_accepted' in ncfile.variables.keys():
            fraction_accepted = ncfile.variables['fraction_accepted'][:].mean()
            dfraction_accepted = ncfile.variables['fraction_accepted'][:].std() / numpy.sqrt(float(niterations))
            print "GHMC acceptance rate %.3f +- %.3f %%" % (fraction_accepted*100.0, dfraction_accepted*100.0)
            
        # Close netcdf file.
        ncfile.close()

    #
    # Analyze umbrella sampling simulation.
    #

    prefix = 'umbrella-solvent'

    # Form filename.
    filename = os.path.join(datadir, prefix + '.nc')

    # Open NetCDF file for reading
    # ncfile = netcdf.NetCDFFile(filename, 'r') # for Scientific.IO.NetCDF
    ncfile = netcdf.Dataset(filename, 'r') # for netCDF4
    
    niterations = ncfile.variables['distance'][:].shape[0]

    # Write distances to file.
    output_filename = os.path.join(processed_datadir, prefix + '.txt')    
    outfile = open(output_filename, 'w')
    for iteration in range(niterations):
        distance = ncfile.variables['distance'][iteration]
        du = ncfile.variables['original_potential'][iteration] - ncfile.variables['potential'][iteration] 
        outfile.write('%12.3f %16.12e\n' % (distance, du))
    outfile.close()

    # Output summary statistics.
    du = ncfile.variables['original_potential'][:] - ncfile.variables['potential'][:]     
    print "std dev of work = %.3f kT" % du.std()

    # Close netcdf file.
    ncfile.close()

