#!/usr/bin/python

#=============================================================================================
# Test MBAR by performing statistical tests on a set of of 1D harmonic oscillators, for which
# the true free energy differences can be computed analytically.
#
# A number of replications of an experiment in which i.i.d. samples are drawn from a set of
# K harmonic oscillators are produced.  For each replicate, we estimate the dimensionless free
# energy differences and mean-square displacements (an observable), as well as their uncertainties.
#
# For a 1D harmonic oscillator, the potential is given by
#   V(x;K) = (K/2) * (x-x_0)**2
# where K denotes the spring constant.
#
# The equilibrium distribution is given analytically by
#   p(x;beta,K) = sqrt[(beta K) / (2 pi)] exp[-beta K (x-x_0)**2 / 2]
# The dimensionless free energy is therefore
#   f(beta,K) = - (1/2) * ln[ (2 pi) / (beta K) ]
#
#=============================================================================================

#=============================================================================================
# TODO
#=============================================================================================
# * Generate a plot after completion, similar to the plot from WHAM paper.
#=============================================================================================

#=============================================================================================
# VERSION CONTROL INFORMATION
#=============================================================================================
__version__ = "$Revision: 218 $ $Date: 2010-07-06 10:34:44 -0400 (Tue, 06 Jul 2010) $"
# $Date: 2010-07-06 10:34:44 -0400 (Tue, 06 Jul 2010) $
# $Revision: 218 $
# $LastChangedBy: mrshirts $
# $HeadURL: https://simtk.org/svn/pymbar/trunk/examples/harmonic-oscillators/harmonic-oscillators.py $
# $Id: harmonic-oscillators.py 218 2010-07-06 14:34:44Z mrshirts $

#=============================================================================================
# IMPORTS
#=============================================================================================
import pymbar
import numpy
import confidenceintervals
import matplotlib.pyplot as plt
import pdb
import sys
import commands
import re
import timeseries
import scipy.stats

#=============================================================================================
# PARAMETERS
#=============================================================================================

generateplots = False
nreplicates = 40 # number of replicates (or bootstrap samples) of experiment for testing uncertainty estimate
verbose = True

# Uncomment the following line to seed the random number generated to produce reproducible output.
numpy.random.seed(0)
calctype = 'harmonic'
#calctype = 'exponential'
#calctype = 'alchemical'
#mbar_method = 'self-consistent-iteration'
#mbar_method = 'Newton-Raphson'
mbar_method = 'adaptive'
dofdalchemical = True
verbose = True
if (calctype == 'alchemical'):
   dofdalchemical = True

##### Harmonic parameters #####
if (calctype == 'harmonic'):
   nall = 10
   dfrac  = 0.01 # fraction of beta used to take numerical derivatives

   # Linear in K and linear in O
   # K = Kscale*(0.1+0.9*lambda); lambda = 0, K=0.1*kscale; lambda=1, K = Kscale*1 
   # O = Oscale*(lambda) 

   Nstates = 11
   klow = 0.1
   K_limit = [klow,1.0] 
   O_limit = [0.0,1.0]
   dK = (K_limit[1]-K_limit[0])/(Nstates-1.0)
   dO = (O_limit[1]-O_limit[0])/(Nstates-1.0)
   
   Kscale = 1.0
   Oscale = 1.0
   K_k = Kscale*numpy.arange(K_limit[0], K_limit[1]+0.5*dK,dK) # spring constants for each state
   O_k = Oscale*numpy.arange(O_limit[0], O_limit[1]+0.5*dK,dO) # spring constants for each state
   N_k = nall*numpy.ones(Nstates) # number of samples from each state (can be zero for some states)
   beta = 0.5 # inverse temperature for all simulations
   lv = 0 # lambda values = 0 indicates even spacing 
   Nnoise = 0  # does the total energy include other harmonic oscillators
               # that do not change?  if there is noise, we assume that
               # the other oscillators are all the same, with average K =
               # $(Khigh+Klow)/2$. and are distributed according to the
               # chi^2 distribution.  The energies will be distributed
               # according to the \chi^2(Nnoise) distribution, Nnoise is
               # the number of noise oscillators that are not affected by
               # lambda.  Since they are normal with variance sigma, they
               # are "standardized" by dividing by sigma.  We then mutiply by K/2.

if (calctype == 'exponential'):
   nall = 200
   dfrac  = 0.1 # fraction of beta used to take numerical derivatives

   # Linear in Lambda -- there is only one parameter
   # L = Kscale*(0.1+0.9*lambda); lambda = 0, K=0.1*kscale; lambda=1, K = Kscale*1 
   # O = Oscale*(lambda) 

   Nstates = 11
   Llow = 0.1
   L_limit = [Llow,1.0] 
   dL = (L_limit[1]-L_limit[0])/(Nstates-1.0)
   
   Lscale = 10.0

   L_k = Lscale*numpy.arange(L_limit[0], L_limit[1]+0.5*dL,dL) # Lambda for each state
   N_k = nall*numpy.ones(Nstates) # number of samples from each state (can be zero for some states)
   beta = 1.0 # inverse temperature for all simulations
   lv = 0 # lambda values = 0 indicates even spacing 
   Nnoise = 100  # does the total energy include other harmonic oscillators
               # that do not change?  if there is noise, we assume that
               # the other oscillators are all the same, with average K =
               # $(Khigh+Klow)/2$, and are distributed according to the
               # Erlang distribution, with shape parameter Nnoise.

##### Alchemical parameters #####

if (calctype == 'alchemical'):
   nequil = 100;              # number of samples assumed to be equilibration, and thus omitted.
   datafile_directory = "../../large-datasets/entropy-database"  # directory for data samples
   datafile_prefix  = 'BUAM_short_mgibbs'  # prefixes for datafile sets 
   #datafile_prefix  = 'UAMM_mgibbs'  # prefixes for datafile sets 
   convert_atmnm3_to_kJmol = 1.01325e5*(1e-09)**3 * 6.02214 * (1e23) / 1000 # Convert pV from atmospheres*nm^3 into kJ/mol
   kB = 1.381*6.02214/1000.0  # Boltzmann's constant (kJ/mol/K)
   temperature = 298.0 # temperature (K)
   pressure = 1.0 # pressure (atm)
   beta = 1./(kB*temperature) 
   #tlsuff = 'l1'
   #thsuff = 'h1'
   #dfrac = 0.033557047 # 10/298, 293 and 303 
   tlsuff = 'l2'
   thsuff = 'h2'
   dfrac = 0.067114094 # 20/298, 288 and 308 
   #tlsuff = 'lm'
   #thsuff = 'hm'
   #dfrac = 0.0033557047 # 1/298, 297.5 and 298.5 

# these definitions are for all fd methods, but are needed to load the data.

dt = beta**(-1)*dfrac  # the temperature difference (in units of K*k_B)
betatlow = (beta**(-1)-dt/2)**(-1)  # beta of the lower temperature (units 1/K*k_B)
betathigh = (beta**(-1)+dt/2)**(-1) # beta of the higher temperature (units 1/K*k_B) 


#===================================================================================================
# ALCHEMICAL HELPER FUNCTIONS
#===================================================================================================

def sortbynum(item):
   vals = item.split('.')
   for v in reversed(vals):
      if v.isdigit():
         return int(v)
   print "Error: No digits found in filename, can't sort ", item

def load_dhdl(datafile_directory,datafile_prefix,beta):

  # Alchemical calculation
  # Generate a list of all datafiles with this prefix.

  filenames = commands.getoutput('ls %(datafile_directory)s/%(datafile_prefix)s.*dhdl.xvg' % vars()).split()   
  # Determine number of alchemical intermediates.

  # sort the files numerically.
  filenames.sort(key=sortbynum)

  for file in filenames:

    # Temporarily read the file into memory.
    infile = open(file, 'r')
    lines = infile.readlines()
    infile.close()
   
    # Determine maxnumber of snapshots from quickly parsing file and ignoring header lines.
    maxsnapshots = 0
    for line in lines:
      if ((line[0] == '#') or (line[0] == '@')):
        continue
      maxsnapshots += 1
   
  
  for file in filenames:
    # File to be read
   
    # Read contents of file into memory.
    print "Reading %s..." % file
    infile = open(file, 'r')
    lines = infile.readlines()
    infile.close()

    t = 0
    n_components = 0
    n_states = 0
    bPV = False

    # Parse the file.
    uninitialized = True
    lv = []
    for line in lines:

      # split into elements
      elements = line.split()

      # This section automatically parses the header to count the number
      # of dhdl components, and the number of states at which energies
      # are calculated, and should be modified for different file input formats.
      #                                                                          
      if ((line[0] == '#') or (line[0] == '@')):
         if (line[0] == '@'):
            # it's an xvg legend entry -- load in the information
            if (line[2] == 's'):  
               # it's a xvg entry, and a lambda component, so note it down
               if (re.search('-lambda',line)):     
                  #it's a listing of the lambdas
                  n_components +=1
               elif (re.search("\\\\xD\\\\",line)):
                  lambda_string = elements[5]
                  lambda_list = re.sub('[()\"]','',lambda_string)
                  lambdas = lambda_list.split(',');
                  lv.append(float(lambdas[2]))  # just collecting the vdw lambda
                  n_states+=1;   
               elif (re.search("pv",line)):     
                   bPV = 1;  
      else:                           
         if (uninitialized):     # we don't know the number of components or lambda states until here.
            uninitialized = False
            K = n_states
            # if expanded ensembles, we assume (For memory purposes) that maxsnapshots is N/(K/3) - that no state
            # has no more than three times the average number.  Usually the case.
            maxsnapshots = maxsnapshots/(K/3)
            dhdlt = numpy.zeros([K,n_components,maxsnapshots],numpy.float64) 
            u_klt = numpy.zeros([K,K,maxsnapshots], numpy.float64) # u_klt[k,m,t] is the reduced potential energy of snapshot t of state k evaluated at state m
            nsnapshots = numpy.zeros([K],int)
            pe = numpy.zeros([K],numpy.float64)
            # we're just collecting vdw_dhdl for now, state 2     
            lv = numpy.array(lv)    
            nsnapshots = numpy.zeros([K],numpy.float64)
            # Load all of the data

         time = float(elements.pop(0))
         k = float(elements.pop(0))

         #
         #  If we print the energy in the dhdl file; if not, delete this line.
         #
         energy = float(elements.pop(0))            

         # 
         # In this section, store the derivative with respect to lambda
         # 
      
         for nl in range(n_components):
            dhdlt[k,nl,nsnapshots[k]] = beta*float(elements.pop(0)) # make it dimensionless
         # now record the potential energy differences.   
         for l in range(K):
            pe[l] = float(elements.pop(0)) + energy
                  
         # pressure-volume contribution
         if (bPV):
            pv = float(elements.pop(0))
         else:
            pv = 0

         # compute and store reduced potential energy at each state
         for l in range(K):
            u_klt[k,l,nsnapshots[k]] = beta * (pe[l] + pv)
               
         nsnapshots[k] +=1 

  #===================================================================================================
  # Preliminaries: Subsample data to obtain uncorrelated samples
  #===================================================================================================   
  print "Nstates:",
  print K

  Nequilibrated = max(nsnapshots) - nequil
  ustore = numpy.zeros([K,K,Nequilibrated], numpy.float64) # u_kln[k,m,n] is the reduced potential energy of uncorrelated sample index n from state k evaluated at state m
  dhdlstore = numpy.zeros([K,Nequilibrated], float) #dhdl is value for dhdl for each component in the file at each time
  ulstore = numpy.zeros([K,K,Nequilibrated], numpy.float64) # u_kln[k,m,n] is the reduced potential energy of uncorrelated sample index n from state k evaluated at state m, low temp data
  uhstore = numpy.zeros([K,K,Nequilibrated], numpy.float64) # u_kln[k,m,n] is the reduced potential energy of uncorrelated sample index n from state k evaluated at state m, high temp data

  N_k = numpy.zeros(K, int) # N_k[k] is the number of uncorrelated samples from state k
  g = numpy.zeros(K,float) # autocorrelation times for the data
  for k in range(K):
    # Determine indices of uncorrelated samples from potential autocorrelation analysis at state k.
    # alternatively, could use the energy differences -- here, we will use total dhdl
    dhdl_sum = numpy.sum(dhdlt[k,:,0:nsnapshots[k]],axis=0)
    g[k] = timeseries.statisticalInefficiency(dhdl_sum[nequil:nsnapshots[k]])
    indices = numpy.array(timeseries.subsampleCorrelatedData(dhdl_sum[nequil:nsnapshots[k]],g=g[k])) # indices of uncorrelated samples
    N = len(indices) # number of uncorrelated samples
    N_k[k] = N      
    indices += nequil

    #### for now, assume just vdw components exist.  Makes it easier. ####
    dhdlstore[k,0:N] = dhdlt[k,2,indices]
    for l in range(K):         
       ustore[k,l,0:N] = u_klt[k,l,indices]
  print "Correlation times:"
  print g
  print ""
  print "number of uncorrelated samples:"
  print N_k
  print ""

  return (N_k,lv,ustore,dhdlstore)

#===================================================================================================
# HARMONIC HELPER FUNCTIONS
#===================================================================================================

def dhdl(x,du,S_k,P_k,Sscale,Pscale,Slow,calctype):

   # scale (S_k) and position (P_k) parameters are the input
   # Sscale is the scale of the scaling constant
   # Pscale is the scale of the position constant
   # Slow is the low value of the scaling constant

   K = len(S_k)

   if (calctype == 'harmonic'):

      for k in range(0,K):
         for l in range(0,K):
            # assume O is always linear with zero intercept.
            # For a 1D harmonic oscillator, the potential is given by
            #   V(x;K) = 0.5*Ks*(0.1+0.9*lam) * (x-Os*lambda))^2
            # where K denotes the spring constant, and O the offset
            # So dU/dlam = 0.5*0.9*Kscale*(x-O(l))**2 + Ks*(0.1+0.9*lam)*(x-Os*lam)*Os. For now, assume linear scaling
            du[k,l,:] = 0.5*(1-Slow)*Sscale*(x[k,:]-O_k[l])**2 + S_k[l]*Pscale*(x[k,:]-P_k[l])

   elif (calctype == 'exponential'):

      for k in range(0,K):
         for l in range(0,K):
            # for the exponential distribution, energies are simply linear. 
            #   V(x;L) = L*x = Ls*(Llow+(1-Llow)*lam)*x
            # where L is the exponential scaking constant
            # dV/dlam = Ls*(1-Lllow)*x
            du[k,l,:] = (1-Slow)*Sscale*(x[k,:])
      
   return du

def generate_exponential_samples(x,u,N_k,L_k,beta,Nnoise=0):

   # x,u, and du are the variables to use for storage, so we don't recreate or overwrite each time.
   # note -- returns reduced energies! 
   # Generate samples.
   # check betas?

   K = len(N_k) # Get # states from # samples of each state
   for k in range(0,K):

      scale = 1/(beta*L_k[k])
      # generate N_k[k] independent samples with Lambda parameter L_k[k]
      x[k,0:N_k[k]] = numpy.random.exponential(scale, [N_k[k]])
      if (Nnoise > 0):
         noise = scipy.stats.erlang.rvs(Nnoise, size=N_k[k])
      else: 
         noise = 0

      # compute potential energy and derivative of all samples in all potentials
      for l in range(0,K):      
         u[k,l,:] = beta*L_k[l] * x[k,:] + noise

   return x,u


def generate_harmonic_samples(x,u,N_k,O_k,sigma_k,Nnoise=0):

  # x,u, and du are the variables to use for storage, so we don't recreate or overwrite each time.
  # note -- returns reduced energies! 
  # Generate samples.
  K = len(N_k) # Get # states from # samples of each state
  K_k = sigma_k**(-2)
  for k in range(0,K):

    # generate N_k[k] independent samples with spring constant K_k[k]
    x[k,0:N_k[k]] = numpy.random.normal(O_k[k], sigma_k[k], [N_k[k]])
    if (Nnoise > 0):
      noise = 0.5*numpy.random.chisquare(Nnoise,size=N_k[k])    
    else: 
      noise = 0

    # compute potential energy and derivative of all samples in all potentials
    for l in range(0,K):      
      u[k,l,:] = (K_k[l]/2.0) * (x[k,:]-O_k[l])**2 + noise

  return x,u

#=============================================================================================
# GENERAL HELPER FUNCTIONS
#=============================================================================================

def setReplicate(replicate,type,df,ddf,du,ddu,ds,dds,analytical=False):

   if (analytical):
      df_err = df - Deltaf_ij_analytical
      du_err = du - Deltau_ij_analytical
      ds_err = ds - Deltas_ij_analytical

   # free energy
   str = 'df_' + type 
   replicate[str] = df.copy()
   str = 'ddf_' + type
   replicate[str] = ddf.copy()
   if (analytical):
      str = 'df_' + type + '_error'
      replicate[str] = df_err.copy()

   # average energy
   str = 'du_' + type 
   replicate[str] = du.copy()
   str = 'ddu_' + type
   replicate[str] = ddu.copy()
   if (analytical):
      str = 'du_' + type + '_error'
      replicate[str] = du_err.copy()

   #entropy
   str = 'ds_' + type 
   replicate[str] = ds.copy()
   str = 'dds_' + type
   replicate[str] = dds.copy()
   if (analytical):
      str = 'ds_' + type + '_error'
      replicate[str] = ds_err.copy()

def integrateij(dxdl,stddxdl,lambdas=0):

   #dxdl is a list of functions to integrate

   N = len(dxdl) 
   K = len(dxdl[0])  # all have same number

   evenSpace = False

   if (numpy.size(lambdas)==1):
      evenSpace = True
      # we assume we have even spacing in lambda
      dl = 1.0/(K-1)
      lambdas = numpy.arange(0,1+0.5/(K-1),dl)

   basef = numpy.zeros([K-1],float)
   for i in range(K-1):
      basef[i] = 0.5*(lambdas[i+1]-lambdas[i])
      
   # we are building a range of weights, for different integration lengths. 
   wk = numpy.zeros([K,K,K],float)

   dx = numpy.zeros([N,K,K],float)
   ddx = numpy.zeros([N,K,K],float)

   for i in range(K):
      for j in range(i):
         l = i-j
         wk[i,j,:] = 0
         if ((l%2)==1 or not(evenSpace)): # odd number of intervals or inequal spacing: use trapezoid  
            for k in range(l):          
               wk[i,j,k] += basef[j+k]  
               wk[i,j,k+1] += basef[j+k]
         else:
            # use Simpson's
            wk[i,j,0] = dl/3 
            wk[i,j,l] = dl/3    
            for k in range(1,l):
               if (k%2):
                  wk[i,j,k] = 4*dl/3
               else:
                  wk[i,j,k] = 2*dl/3

   # need the '-' sign, this actually reverses the free energies.
   for i in range(K):
      for j in range(i):
         l = i-j+1
         for n in range(N):
            dx[n,i,j] = -numpy.dot(wk[i,j,0:l],dxdl[n][j:i+1])
            ddx[n,i,j] = numpy.sqrt(numpy.dot(wk[i,j,0:l]**2,stddxdl[n][j:i+1]**2))

   for i in range(K):
      for j in range(i):
         for n in range(N):
            dx[n,j,i] = -dx[n,i,j]
            ddx[n,j,i] = ddx[n,i,j]

   # we are assuming there are three here.  Generalize later.
   return (dx[0,:,:],ddx[0,:,:],dx[1,:,:],ddx[1,:,:],dx[2,:,:],ddx[2,:,:])
  
#=============================================================================================
# MAIN
#=============================================================================================

print "Number of replicates:", 
print nreplicates

if (calctype == 'harmonic') or (calctype == 'exponential'):

  K = numpy.size(N_k)
  print "Nstates:",
  print K
  # Determine maximum number of samples to be drawn for any state.
  N_max = numpy.max(N_k)
  Nl_max = N_max
  Nh_max = N_max

  # Determine number of simulations.
  if (calctype == 'harmonic'):
     if numpy.shape(K_k) != numpy.shape(N_k): raise "K_k and N_k must have same dimensions."
     if numpy.shape(O_k) != numpy.shape(N_k): raise "O_k and N_k must have same dimensions."
  if (calctype == 'exponential'):
     if numpy.shape(L_k) != numpy.shape(N_k): raise "L_k and N_k must have same dimensions."

  if (calctype == 'harmonic'):
     # Compute widths of sampled distributions.
     # For a harmonic oscillator with spring constant K,
     # x ~ Normal(x_0, sigma^2), where sigma = 1/sqrt(beta K)

     sigma_k = (beta * K_k)**-0.5
     print "Gaussian widths:"
     print sigma_k

     # Compute the absolute dimensionless free energies of each oscillator analytically.
     # f = - ln(sqrt((2 pi)/(beta K)) )

     print 'Computing dimensionless free energies analytically...'
     f_k_analytical = - numpy.log(numpy.sqrt(2 * numpy.pi) * sigma_k )
     u_k_analytical = (1/2)*numpy.ones([K],float)  # By eqipartition
     s_k_analytical = u_k_analytical - f_k_analytical 

  if (calctype == 'exponential'):

     # Compute the absolute dimensionless free energies of each oscillator analytically.
     # f = -log \int (exp(-beta L x) = (beta*L)^(-1) = log(beta*L)
     # \beta u = \int beta*L*x exp(-beta L x)(beta*l) = 1 

     print 'Computing dimensionless free energies analytically...'
     f_k_analytical = numpy.log(beta*L_k)
     u_k_analytical = numpy.ones([K],float)  
     s_k_analytical = u_k_analytical - f_k_analytical 
  
if (calctype != 'alchemical'):
   # Compute true free energy differences.
   Deltaf_ij_analytical = numpy.zeros([K,K], dtype = numpy.float64)
   Deltau_ij_analytical = numpy.zeros([K,K], dtype = numpy.float64)
   Deltas_ij_analytical = numpy.zeros([K,K], dtype = numpy.float64)

   for i in range(0,K):
      for j in range(0,K):
         Deltaf_ij_analytical[i,j] = f_k_analytical[j] - f_k_analytical[i]
         Deltau_ij_analytical[i,j] = u_k_analytical[j] - u_k_analytical[i]
         Deltas_ij_analytical[i,j] = s_k_analytical[j] - s_k_analytical[i]

if (calctype == 'harmonic'):
   # DEBUG info
   print "This script will perform %d replicates of an experiment where samples are drawn from %d harmonic oscillators." % (nreplicates, K)
   print "The harmonic oscillators have equilibrium positions"
   print O_k
   print "and spring constants"
   print K_k
   print "and the following number of samples will be drawn from each (can be zero if no samples drawn):"
   print N_k
   print ""
   
elif (calctype == 'exponential'):
   print "This script will perform %d replicates of an experiment where samples are drawn from %d exponential distributions." % (nreplicates, K)
   print "The exponential distributions have scale parameters"
   print L_k
   print "and the following number of samples will be drawn from each (can be zero if no samples drawn):"
   print N_k
   print ""

elif (calctype == 'alchemical'):
   # load alchemical files
   (N_k,lv,ustore,dhdlstore) = load_dhdl(datafile_directory,datafile_prefix,beta) 
   K = numpy.shape(ustore)[0]
   N_max = numpy.max(N_k)
   
   if (dofdalchemical):
      
      datafile_prefix_lowt = datafile_prefix + tlsuff
      # load higher temperature information
      (Nl_k,lv,ulstore,dhdl_tmp) = load_dhdl(datafile_directory,datafile_prefix_lowt,betatlow)
      Nl_max = numpy.max(Nl_k)

      datafile_prefix_hight = datafile_prefix + thsuff
      # load lower temperature information 
      (Nh_k,lv,uhstore,dhdl_tmp) = load_dhdl(datafile_directory,datafile_prefix_hight,betathigh)
      Nh_max = numpy.max(Nh_k)

# data all loaded

# Conduct a number of replicates of the same experiment
replicates = [] # storage for one hash for each replicate
# Initialize storage for samples.
u_kln = numpy.zeros([K,K,N_max], dtype = numpy.float64) # u_kmt(k,l,n) is the value of the energy for snapshot t from simulation k at potential m
dhdl_kn = numpy.zeros([K,N_max], dtype = numpy.float64) # dhdl_kn(k,n) is the value of the derivative of the energy for snapshot t from simulation k
uh_kln = numpy.zeros([K,K,Nh_max], dtype = numpy.float64) # Uh_kln(k,m,t) is the value of the energy for snapshot t from simulation k at potential l, lower temperature
ul_kln = numpy.zeros([K,K,Nl_max], dtype = numpy.float64) # Ul_kln(k,m,t) is the value of the energy for snapshot t from simulation k at potential l, higher temperature

if (calctype != 'alchemical'):
   x_kn = numpy.zeros([K,N_max], dtype = numpy.float64)  # x_kn[k,n] is the coordinate x of independent snapshot n of simulation k
   dhdl_kln = numpy.zeros([K,K,N_max], dtype = numpy.float64) # dhdl_kln(k,l,n) is the value of the derivative of the energy for snapshot n at the simulation k, evaluated at potential function l
   # alternate storage variables for temperature differences, only valid for temperature differences.
   xh_kn = numpy.zeros([K,N_max], dtype = numpy.float64) # x_kn[k,n] is the coordinate x of independent snapshot n of simulation k
   xl_kn = numpy.zeros([K,N_max], dtype = numpy.float64) # x_kn[k,n] is the coordinate x of independent snapshot n of simulation k

for replicate_index in range(0,nreplicates):
  print "Performing replicate %d / %d" % (replicate_index+1, nreplicates)

  replicate = {}
  #=============================================================================================
  # Generate independent data samples from K one-dimensional harmonic oscillators centered at q = 0.
  #=============================================================================================
  
  if (calctype == 'harmonic'):

     print 'generating samples...'

     # Generate samples and reduced potential energies.
     (x_kn,u_kln) = generate_harmonic_samples(x_kn,u_kln,N_k,O_k,sigma_k,Nnoise)

     # generate derivatives
     dhdl_kln = beta*dhdl(x_kn,dhdl_kln,K_k,O_k,Kscale,Oscale,klow,calctype)
     # for now, just deal with the dhdl at the current lambda
     for k in range(K):
        dhdl_kn[k,:] = dhdl_kln[k,k,:]

  elif (calctype == 'exponential'):

     print 'generating samples...'

     # Generate samples and reduced potential energies.
     (x_kn,u_kln) = generate_exponential_samples(x_kn,u_kln,N_k,L_k,beta,Nnoise)

     # generate derivatives
     dhdl_kln = beta*dhdl(x_kn,dhdl_kln,L_k,numpy.zeros(K,float),Lscale,1,Llow,calctype)
     # for now, just deal with the dhdl at the current lambda
     for k in range(K):
        dhdl_kn[k,:] = dhdl_kln[k,k,:]

  elif (calctype == 'alchemical'):

     print 'generate bootstrap samples'
     # generate bootstrapped energies and dhdl values
     for k in range(K):
        if (nreplicates == 1): # if only one replicate, we want the full data.
           permut = range(N_k[k])
        else:
           permut = numpy.random.randint(0,N_k[k],N_k[k])
        for l in range(K):
           u_kln[k,l,0:N_k[k]] = ustore[k,l,permut]
        dhdl_kn[k,0:N_k[k]] = dhdlstore[k,permut]
  
  else:
     print "No such type of calculation"

  #=============================================================================================
  # Estimate free energies and expectations.
  #=============================================================================================

  # Estimate free energies from simulation using MBAR.
  print "Estimating relative free energies from simulation (this may take a while)..."

  # Initialize the MBAR class, determining the free energies.
  mbar = pymbar.MBAR(u_kln, N_k, method=mbar_method,relative_tolerance=1.0e-10,verbose=verbose) # use fast Newton-Raphson solver 
  (a, b) = mbar.getFreeEnergyDifferences()
  # let's save the f_k to initialize other mbar states, which will all be relatively similar, 
  # to make it faster to converge to the final answer
  initial_f_k = mbar.f_k
  #=============================================================================================
  # Compute entropies and enthalpies with MBAR
  #=============================================================================================

  # Get matrix of dimensionless free energy differences and uncertainty estimate plus entropies and enthalpies
  (Deltaf_ij_estimated, dDeltaf_ij_estimated,Deltau_ij_estimated, dDeltau_ij_estimated,Deltas_ij_estimated, dDeltas_ij_estimated) = mbar.computeEntropyAndEnthalpy() 

  setReplicate(replicate,'mbar',Deltaf_ij_estimated,dDeltaf_ij_estimated,Deltau_ij_estimated,dDeltau_ij_estimated,Deltas_ij_estimated,dDeltas_ij_estimated,calctype!='alchemical')

  #=============================================================================================
  # Compute entropies and enthalpies with endpoints, using the full free energy difference from MBAR.
  #=============================================================================================

  # endpoint estimate of energies.
  u_k_estimated = numpy.zeros([K],numpy.float64) 
  du_k_estimated = numpy.zeros([K],numpy.float64) 
  
  # 'standard' expectation averages
  for k in range(K):
    u_k_estimated[k] = numpy.average(u_kln[k,k,0:N_k[k]])
    du_k_estimated[k]  = numpy.sqrt(numpy.var(u_kln[k,k,0:N_k[k]])/(N_k[k]-1))

  for i in range(K):
    for j in range(K):
      Deltau_ij_estimated[i,j] = u_k_estimated[j] - u_k_estimated[i] 
      dDeltau_ij_estimated[i,j] = numpy.sqrt(du_k_estimated[j]**2 + du_k_estimated[i]**2)
      # these uncertainties aren't quite right, since they should be correlated?  But no way to correlate
      # without actually doing MBAR. 
      # s = u - f 
      Deltas_ij_estimated[i,j] = Deltau_ij_estimated[i,j] - replicate['df_mbar'][i,j]
      dDeltas_ij_estimated[i,j] = numpy.sqrt(dDeltau_ij_estimated[i,j]**2 + replicate['ddf_mbar'][i,j]**2)

  setReplicate(replicate,'endpoint',Deltaf_ij_estimated,dDeltaf_ij_estimated,Deltau_ij_estimated,dDeltau_ij_estimated,Deltas_ij_estimated,dDeltas_ij_estimated,calctype!='alchemical')

  #=============================================================================================
  # Compute free energies, entropies, and enthalpies with TI (single state averages, not MBAR averages)
  #=============================================================================================
  # assume trapezoidal rule

  dfdl_k = numpy.zeros([K],numpy.float64) 
  ddfdl_k = numpy.zeros([K],numpy.float64) 
  dudl_k = numpy.zeros([K],numpy.float64) 
  ddudl_k = numpy.zeros([K],numpy.float64) 
  dsdl_k = numpy.zeros([K],numpy.float64) 
  ddsdl_k = numpy.zeros([K],numpy.float64) 
  u_k = numpy.zeros([K],numpy.float64) 
  du_k = numpy.zeros([K],numpy.float64) 

  for k in range(K):

     # computing dhdl
     dfdl_k[k] = numpy.average(dhdl_kn[k,0:N_k[k]])
     ddfdl_k[k]  = numpy.sqrt(numpy.var(dhdl_kn[k,0:N_k[k]])/(N_k[k]-1))

     # averages for computation of energy and entropy
     u_k[k] = numpy.average(u_kln[k,k,0:N_k[k]])
     du_k[k] = numpy.sqrt(numpy.var(u_kln[k,k,0:N_k[k]])/(N_k[k]-1)) 

     covM = -numpy.cov(dhdl_kn[k,0:N_k[k]],u_kln[k,k,0:N_k[k]])
     dsdl_k[k] = covM[0,1]
     # three possibilities for the variance. 
     # 1) Just ignore the variance in the averages in cov = <(a-<a>)(b-<b>)> as they scale an order N lower (I think!)
     #    This doesn't work all that well, unfortunately. 
     #    ddsdl_k[k] = numpy.sqrt(numpy.var(u_kln[k,k,0:N_k[k]]*dhdl_kn[k,0:N_k[k]])/(N_k[k]-1))
      
     # 2) current use: We assume we have the variance of Wishart distribution, 
     # which is the covariance of two normal distributions.  So we assume normal distributions of u and dhdl
     # Not perfect, but appears to work very good for large amounts of noise, for exanple
     ddsdl_k[k] = numpy.sqrt((covM[0,1]**2+covM[1,1]*covM[0,0])*N_k[k]**(-1))

     
     dudl_k[k] =  dfdl_k[k] + dsdl_k[k] 

     # 1) for average energies.  Just ignore the variance in the averages in cov = <(a-<a>)(b-<b>)> as they scale an order N lower (I think!)
     # Again, ends up being not so good for large noise    
     # ddudl_k[k] = numpy.sqrt(numpy.var((1-u_kln[k,k,0:N_k[k]])*dhdl_kn[k,0:N_k[k]])/(N_k[k]-1))

     # 2) Wishart distribution: Not perfect, but appears to work pretty good for large noise, for exanple
     # More simply:  var(a,cov(a,b)) = var(a) + var(cov(a,b)) -2cov(a,ab) 
     covM1 = numpy.cov(dhdl_kn[k,0:N_k[k]]*u_kln[k,k,0:N_k[k]],dhdl_kn[k,0:N_k[k]])
     ddudl_k[k] = numpy.sqrt(ddsdl_k[k]**2 + ddfdl_k[k]**2 -2*(covM[0,1]/N_k[k]))

     # 3) try to calculate the whole chain of covariance.  This basically doesn't work; for large noise,
     # It's completely off.  Doing something wrong here; not sure if the ideas are wrong, or it's a typo.
     # We need the variance in the sum sample covariance estimator.  Can't find online anywhere, will rederive
     #
     # Take only terms in N^(-1) or higher.
     #
     # need var(sum(a_ib_i)/N - sum(a_i)sum(b_j)/N^2)
     #     =    var(sum_i(a_ib_i)/N)                           (1)
     #       +  var(sum_i(a_i)sum_i(b_j)/N^2)                  (2)
     #       - 2cov(sum_i(a_i,b_i)/N,sum_i(a_i)sum_j(b_j)/N^2) (3)
     # (1) = N^(-1)var(ab) 
     # (2) = var(sum(a_i)sum(b_j)/N^2)
     #     = var(sum_i(a_ib_i)/N^2) + var(sum_i sum_{i ne j} (a_i b_j/N^2)) 
     #     = N^(-3) var(ab)         +      0 (since a_i and b_j are uncorrelated if i ne j)
     # (3) = cov(sum_i(a_i,b_i)/N,sum_i(a_i)sum_j(b_j)/N^2) 
     #     =  sum_i sum_j sum_k N^(-3) cov(a_i b_i,a_j b_k)
     #      + sum_i N^(-3) cov(a_i b_i,a_i b_i)   i=j=k 
     #      + sum_i sum_k N^(-3) cov(a_ib_i,a_i b_k)   i=j ne k 
     #      + sum_i sum_j N^(-3) cov(a_ib_i,a_j b_i)   i=k ne j 
     #      + sum_i sum_j N^(-3) cov(a_ib_i,a_j b_k)  i ne j ne k, term = 0 since cov(a_i b_i, a_j b_k) = 0
     #     =  N^(-2) var(ab) + N^(-1) <b> cov(ab,a) + N^(-1) <a> cov(ab,b)
     #  Total:
     #     =  N^(-1)var(ab) - 2[N^(-1) <b> cov(ab,a) + N^(-1) <a> cov(ab,b)]
     #     =  N^(-1)[var(ab) - 2<b>cov(ab,a) - 2<a>cov(ab,b)]
     #               a = -dudl  b = u
     #     =  N^(-1)[var(u*dudl) - 2<u>cov(u*dudl,dudl) - 2<dudl>cov(u*dudl,u)]
     #
     #covaba = numpy.cov(dhdl_kn[k,0:N_k[k]]*u_kln[k,k,0:N_k[k]],dhdl_kn[k,0:N_k[k]])[0,1]
     #covabb = numpy.cov(dhdl_kn[k,0:N_k[k]]*u_kln[k,k,0:N_k[k]],u_kln[k,k,0:N_k[k]])[0,1]
     #var1_dds = numpy.var(u_kln[k,k,0:N_k[k]]*dhdl_kn[k,0:N_k[k]])
     #var2_dds = dfdl_k[k]*covabb
     #var3_dds = du_k[k]*covaba
     #
     #ddsdl_k[k] =  numpy.sqrt((var1_dds-2*(var2_dds+var3_dds))*N_k[k]**(-1))
     #
     # This basically doesn't work.  Doing something wrong here.
     # var(d<u>dl) = var(<dudl> + <dudl*u> - <dudl><u>)
     #             = var[ sum_i (dudl_i/N) + sum_i (dudl_i u_i/N) - sum_i (dudl_i/N) sum_j (u_j/N)]
     #             = var[ sum_i (a_i/N) + sum_i (a_i b_i/N) - sum_i (a_i/N) sum_j (b_j/N)]
     #             = var[ sum_i (a_i/N)] + var[sum_i (a_i b_i/N)] + var[sum_i (a_i/N) sum_j (b_j/N)]
     #              + 2 cov(sum_i (a_i/N), sum_i (a_i b_i/N))
     #              - 2 cov(sum_i (a_i/N), sum_i (a_i/N) sum_j (b_j/N))
     #              - 2 cov(sum_i (a_i b_i/N), sum_i (a_i/N) sum_j (b_j/N))
     #             = N^(-1) [var(a) + var(ab) - 2<b>cov(ab,a) - 2<a>cov(ab,b)] 
     #              + 2 cov(sum_i (a_i/N), sum_i (a_i b_i/N))     
     #              - 2 cov(sum_i (a_i/N), sum_i (a_i/N) sum_j (b_j/N))     
     # cov(sum_i (a_i/N), sum_i (a_i b_i/N)) = 
     #             = N^(-2) sum_i sum_j cov(a_i,a_j b_j)  (cross terms vanish)
     #             = N^(-2) sum_i cov(a_i,a_i b_i)  (i=j)
     #             = N^(-1) cov(ab,a) 
     # cov(sum_i (a_i/N), sum_i (a_i/N) sum_j (b_j/N)) = 
     #             = N^(-3) sum_i sum_j sum_k cov(a_i,a_jb_k) 
     #             = N^(-3) sum_i cov(a_i,a_ib_i)      i=j=k               
     #             = N^(-2) cov(a,ab)                  
     #             = N^(-2) sum_i sum_k cov(a_i,a_i b_k)      i=j ne k               
     #             = N^(-1) <b>var(a) 
     #             = N^(-2) sum_i sum_j cov(a_i,a_j b_i)      i=j ne j               
     #             = N^(-1) <a>cov(a,b) 
     # Totals: var(<a*b> - <a><b> + <a>) = 
     #         var(<a*b> - <a><b>)  (from dsdl computation) 
     #         + N^(-1)[var(a) + 2cov(ab,a) - 2<b>var(a) - 2<a>cov(a,b)]  
     #         = var(<a*b> - <a><b>) + N^(-1)[(1-2<b>)var(<a>) + 2cov(ab,a) - 2<a>cov(a,b)]  
     #         = var(<a*b> - <a><b>) + (1-2<b>)var(<a>) + 2N^(-1)[cov(ab,a) - <a>cov(a,b)]  
     #                     a = -dudl, b = u, cov(a,b) = -dsdl 
     #         = var(dsdl) + (1-2<u>)var(dudl) + 2N^(-1)[cov(u*dudl,dudl) - <dudl><dsdl>]  
     #ddudl_k[k] = numpy.sqrt(ddsdl_k[k]**2 + (1-2*u_k[k])*ddfdl_k[k]**2 + 2*N_k[k]**(-1)*(covaba - dfdl_k[k]*dsdl_k[k]))


  (Deltaf_ij_estimated, dDeltaf_ij_estimated,Deltau_ij_estimated, dDeltau_ij_estimated,Deltas_ij_estimated, dDeltas_ij_estimated) = integrateij([dfdl_k,dudl_k,dsdl_k],[ddfdl_k,ddudl_k,ddsdl_k],lv)

  setReplicate(replicate,'ti_single',Deltaf_ij_estimated,dDeltaf_ij_estimated,Deltau_ij_estimated,dDeltau_ij_estimated,Deltas_ij_estimated,dDeltas_ij_estimated,calctype!='alchemical')

  #=============================================================================================
  # Compute free energies, entropies, and enthalpies with TI (multistate averages)
  #=============================================================================================

  # VARIANCE ESTIMATE STILL NEEDS WORK
  # in theory, we can compute with the value of dhdl at many states.  However, there appear to be errors that are 
  # might involve the MBAR computeExpectations for multiple observables.  Need to investigate.

  if (calctype != 'alchemical'):
     # Just ignore the variance in the averages in cov = <(a-<a>)(b-<b>)> as they scale an order N lower (I think!)
    (dfdl_k,ddfdl_k) = mbar.computeExpectations(dhdl_kln)
    (u_k,du_k) = mbar.computeExpectations(u_kln) # for calculation of dsdl and dudl

    (dsdl_k,ddsdl_k) = mbar.computeExpectations(u_kln*(-dhdl_kln)) 
    dsdl_k += (u_k*dfdl_k)

    # need a better estimator for covariance current one fails at high noise level!! 
    # should be able to do it with computeMultipleExpectations, if its working.
  
    (dudl_k,ddudl_k) = mbar.computeExpectations((1-u_kln)*(dhdl_kln))  
    dudl_k += (u_k*dfdl_k)
 
    (Deltaf_ij_estimated, dDeltaf_ij_estimated,Deltau_ij_estimated, dDeltau_ij_estimated,Deltas_ij_estimated, dDeltas_ij_estimated) = integrateij([dfdl_k,dudl_k,dsdl_k],[ddfdl_k,ddudl_k,ddsdl_k],lv)

    # note: analytical uncertainty might have problems?  Perhaps cannot neglect the uncertaintes? Except problems for df as well.
    setReplicate(replicate,'ti_multi',Deltaf_ij_estimated,dDeltaf_ij_estimated,Deltau_ij_estimated,dDeltau_ij_estimated,Deltas_ij_estimated,dDeltas_ij_estimated,calctype!='alchemical')
  
  #===================================================================================================================
  # Compute entropies with temperature finite difference derivatives, using MBAR to compute energies at different T
  #===================================================================================================================

  # divide by beta at the temperature the data was generated at to get non-reduced free energies, 
  # then take the derivative wrt temperature, then convert back to reduced quantities at the original beta

  wl = (beta/betatlow)  # unitless
  wh = (beta/betathigh) # unitless
  dwl = 1/(dt*betatlow)  # unitless
  dwh = 1/(dt*betathigh) # unitless

  upert = numpy.zeros([K,3*K,N_max])
  upert[:,0:K,:] = u_kln.copy()  
  upert[:,K:2*K,:] = wl**(-1) * upert[:,0:K,:]  
  # Perturbed state has lower temperature; _energy_ does not change, but reduced energy becomes higher.
  upert[:,2*K:3*K,:] = wh**(-1) * upert[:,0:K,:]  
  # Perturbed state has higher temperature; _energy_ does not change, but reduced energy becomes lower
  (Deltaf_ij_pert,dDeltaf_ij_pert) = mbar.computePerturbedFreeEnergies(upert)
  var_ij = dDeltaf_ij_pert**2

  init_pert_all = Deltaf_ij_pert[0:3*K,0]   # save for mbarall starting location
  
  # now compute the free energy differences:
  Deltaf_ij_estimated = replicate['df_mbar']
  dDeltaf_ij_estimated = replicate['ddf_mbar']

  # convert from dimensionless to dimensional free energy, divide by dt, and convert back to dimensionless
  # Note: reduced S is beta (TS) = S/kB
  Deltas_ij_estimated = dwl*Deltaf_ij_pert[K:2*K,K:2*K] - dwh*Deltaf_ij_pert[2*K:3*K,2*K:3*K]
  Deltau_ij_estimated = replicate['df_mbar'] + Deltas_ij_estimated

  # uncertainty in s is var(a_h(f_i^h - f_j^h) - a_l(f_i^l - f_j^l)) = 
  # a_h^2 var(f_i^h - f_j^h) + a_l^2 var(f_i^l - f_j^l) - 2 a_h a_l cov(f_i^h - f_j^h, f_i^l - f_j^l)
  # cov(f_i^h - f_j^h, f_i^l - f_j^l) = cov(f_i^h,f_i^l) - cov(f_i^h,f_j^l) - cov(f_j^h,f_i^l) + cov(f_j^h,f_j^l) 
  #
  # We want cov(a-b,c-d) = cov(a,c)-cov(a,d)-cov(b,c)+cov(b,d)
  #
  # since var(x-y) = var(x)+var(y) - 2cov(x,y)
  #
  #       Then:
  #
  #     cov(a,c)   =      -0.5*var(a-c) + 0.5*var(a) + 0.5var(c)
  #    -cov(a,d)   =       0.5*var(a-d) - 0.5*var(a) - 0.5var(d)
  #    -cov(b,c)   =       0.5*var(b-c) - 0.5*var(b) - 0.5var(c)
  #     cov(b,d)   =      -0.5*var(b-d) + 0.5*var(b) + 0.5var(d)
  #  -------------------------------------------------------------
  #   cov(a,c)-cov(a,d)-cov(b,c)+cov(b,d) = 0.5( -var(a-c) + var(a-d) + var(b-c) - var(b-d))
  #
  #  So:
  # 
  #  -2cov(a-b,c-d)   =  var(a-c) - var(a-d) - var(b-c) + var(b-d)
  #
  #  So:
  #
  # var(a_h(f_i^h - f_j^h) + a_l(f_i^l - f_j^l)) =   
  #
  # = a_h^2 var(f_i^h - f_j^h) + a_l^2 var(f_i^l - f_j^l) - 2a_h a_l cov(f_i^h - f_j^h, f_i^l - f_j^l)
  # = a_h^2 var(f_i^h - f_j^h) + a_l^2 var(f_i^l - f_j^l) + a_h a_l [var(f_i^h - f_i^l) - var(f_i^h - f_j^l) - var(f_j^h-f_i^l) + var(f_j^h - f_j^l)

  dDeltas_ij_estimated = numpy.zeros([K,K],dtype=numpy.float64)
  for i in range(K):
    for j in range(K):
      dDeltas_ij_estimated[i,j] = (dwl**2)*var_ij[i+K,j+K] + (dwh**2)*var_ij[i+2*K,j+2*K] + \
          dwl*dwh*(var_ij[i+2*K,i+K]-var_ij[i+2*K,j+K]-var_ij[j+2*K,i+K]+var_ij[j+2*K,j+K]) 
  dDeltas_ij_estimated = numpy.sqrt(dDeltas_ij_estimated)

  # given we haven't sampled any different information, the best possible reweighting estimate is the MBAR estimate.

  Deltau_ij_estimated = replicate['du_mbar']
  dDeltau_ij_estimated = replicate['ddu_mbar']

  setReplicate(replicate,'fd_fep',Deltaf_ij_estimated,dDeltaf_ij_estimated,Deltau_ij_estimated,dDeltau_ij_estimated,Deltas_ij_estimated,dDeltas_ij_estimated,calctype!='alchemical')

  #=============================================================================================
  # Compute entropies with temperature finite difference derivatives from direct sampling
  #=============================================================================================

  #Only can do with harmonic samples 

  if (calctype != 'alchemical' or dofdalchemical):
     if (calctype == 'harmonic'):                          

        print 'generating low t samples...'
        sigmat_k = (betatlow * K_k)**-0.5

        (xl_kn,ul_kln) = generate_harmonic_samples(xl_kn,ul_kln,N_k,O_k,sigmat_k,Nnoise)
        Nl_k = N_k

     if (calctype == 'exponential'):
        (xl_kn,ul_kln) = generate_exponential_samples(xl_kn,ul_kln,N_k,L_k,betatlow,Nnoise)
        Nl_k = N_k

     if (calctype == 'alchemical'):
        for k in range(K):
           if (nreplicates == 1): # if only one replicate, we want the full data.
              permut = range(Nl_k[k])
           else:
              permut = numpy.random.randint(0,Nl_k[k],Nl_k[k])
           for l in range(K):
              ul_kln[k,l,0:Nl_k[k]] = ulstore[k,l,permut]

     # Initialize the MBAR class, determining the free energies.
     mbarl = pymbar.MBAR(ul_kln, Nl_k, method=mbar_method, relative_tolerance=1.0e-11, initial_f_k=initial_f_k, verbose=verbose)
     (Deltaf_ij_low, dDeltaf_ij_low) = mbarl.getFreeEnergyDifferences()
     
     if (calctype == 'harmonic'):                          
        print 'generating high t samples...'
        sigmat_k = (betathigh * K_k)**-0.5

        (xh_kn,uh_kln) = generate_harmonic_samples(xh_kn,uh_kln,N_k,O_k,sigmat_k,Nnoise)
        Nh_k = N_k

     if (calctype == 'exponential'):
        (xh_kn,uh_kln) = generate_exponential_samples(xh_kn,uh_kln,N_k,L_k,betathigh,Nnoise)
        Nh_k = N_k

     if (calctype == 'alchemical'):   
        # generate bootstrapped energies and dhdl values                                                                         
        for k in range(K):
           if (nreplicates == 1): # if only one replicate, we want the full data.
              permut = range(Nh_k[k])
           else:
              permut = numpy.random.randint(0,Nh_k[k],Nh_k[k])
           for l in range(K):
              uh_kln[k,l,0:Nh_k[k]] = uhstore[k,l,permut]

     # Initialize the MBAR class, determining the free energies.
     mbarh = pymbar.MBAR(uh_kln, Nh_k, method=mbar_method, relative_tolerance=1.0e-11, initial_f_k=initial_f_k,verbose=verbose)
     (Deltaf_ij_high, dDeltaf_ij_high) = mbarh.getFreeEnergyDifferences()

     Deltas_ij_estimated = dwl*Deltaf_ij_low - dwh*Deltaf_ij_high
     dDeltas_ij_estimated = numpy.sqrt((dwl*dDeltaf_ij_low)**2 + (dwh*dDeltaf_ij_high)**2)
     
     Deltaf_ij_estimated = replicate['df_mbar']
     dDeltaf_ij_estimated = replicate['ddf_mbar']
     
     Deltau_ij_estimated = replicate['du_mbar']
     dDeltau_ij_estimated = replicate['ddu_mbar']
     
     setReplicate(replicate,'fd',Deltaf_ij_estimated,dDeltaf_ij_estimated,Deltau_ij_estimated,dDeltau_ij_estimated,Deltas_ij_estimated,dDeltas_ij_estimated,calctype!='alchemical')
     
  #===================================================================================================
  # Compute entropies with temperature finite difference derivatives from direct sampling, using MBAR!
  #===================================================================================================

  #for now, this _assumes_ that 'fd' has already been run, so that different temperature samples don't  
  # need to be generated.

  if (calctype != 'alchemical' or dofdalchemical):
     N_pert = numpy.zeros([3*K],int)
     N_pert[0:K] = N_k
     N_pert[K:2*K] = Nl_k
     N_pert[2*K:3*K] = Nh_k
     N_pert_max = numpy.max(N_pert)
  
     # need to fill in all the 9 quadrants to get the right free energy!
     upert = numpy.zeros([3*K,3*K,N_pert_max])

     upert[0:K,0:K,0:numpy.max(N_k)] = u_kln[:,:,0:numpy.max(N_k)]
     upert[0:K,K:2*K,0:numpy.max(N_k)] = wl**(-1)*u_kln[:,:,0:numpy.max(N_k)]
     upert[0:K,2*K:3*K,0:numpy.max(N_k)] = wh**(-1)*u_kln[:,:,0:numpy.max(N_k)]
     
     upert[K:2*K,0:K,0:numpy.max(Nl_k)] = wl*ul_kln[:,:,0:numpy.max(Nl_k)]
     upert[K:2*K,K:2*K,0:numpy.max(Nl_k)] = ul_kln[:,:,0:numpy.max(Nl_k)]
     upert[K:2*K,2*K:3*K,0:numpy.max(Nl_k)] = (wl/wh)*ul_kln[:,:,0:numpy.max(Nl_k)]

     upert[2*K:3*K,0:K,0:numpy.max(Nh_k)] = (wh)*uh_kln[:,:,0:numpy.max(Nh_k)]
     upert[2*K:3*K,K:2*K,0:numpy.max(Nh_k)] = (wh/wl)*uh_kln[:,:,0:numpy.max(Nh_k)]
     upert[2*K:3*K,2*K:3*K,0:numpy.max(Nh_k)] = uh_kln[:,:,0:numpy.max(Nh_k)]

     mbart = pymbar.MBAR(upert, N_pert, method=mbar_method,relative_tolerance=1.0e-10,initial_f_k=init_pert_all,verbose=verbose)

     (Deltaf_ij_trange,dDeltaf_ij_trange) = mbart.getFreeEnergyDifferences()

     # now compute the free energy differences:
     Deltaf_ij_estimated = Deltaf_ij_trange[0:K,0:K]  # will be even more accurate that the results from MBAR. 
     dDeltaf_ij_estimated = dDeltaf_ij_trange[0:K,0:K]
     
     # first term; reduced free energy at lower temperature, divide by betatlow to get free energy at low temperature.
     # divide by T, then multiply by beta
     # second term; reduced free energy at higher temperature, divide by betathigh to get free energy at high temperature.
     # divide by T, then multiply by beta

     Deltas_ij_estimated = dwl*Deltaf_ij_trange[K:2*K,K:2*K] - dwh*Deltaf_ij_trange[2*K:3*K,2*K:3*K]
     Deltau_ij_estimated = replicate['df_mbar'] + Deltas_ij_estimated

     dDeltas_ij_estimated = numpy.zeros([K,K],dtype=numpy.float64)
     dDeltau_ij_estimated = numpy.zeros([K,K],dtype=numpy.float64)
     var_ij = dDeltaf_ij_trange**2
                     
     # uncertainty in s is var(a_h(f_i^h - f_j^h) - a_l(f_i^l - f_j^l)) = 
     # same uncertainty calculation as with fd_fep

     for i in range(K):
        for j in range(K):
           dDeltas_ij_estimated[i,j] = (dwl**2)*var_ij[i+K,j+K] + (dwh**2)*var_ij[i+2*K,j+2*K] + \
           dwl*dwh*(var_ij[i+2*K,i+K]-var_ij[i+2*K,j+K]-var_ij[j+2*K,i+K]+var_ij[j+2*K,j+K])  
     dDeltas_ij_estimated = numpy.sqrt(dDeltas_ij_estimated)

     # calculate best estimate dU directly from all of the information.  
     # Don't need to pass all 3x3 squares in, but easier to do it this way.
     (du,ddu) = mbart.computeExpectations(upert,output = 'differences')

     Deltau_ij_estimated = du[0:K,0:K]   
     dDeltau_ij_estimated = ddu[0:K,0:K]   

     setReplicate(replicate,'fd_mbar',Deltaf_ij_estimated,dDeltaf_ij_estimated,Deltau_ij_estimated,dDeltau_ij_estimated,Deltas_ij_estimated,dDeltas_ij_estimated,calctype!='alchemical')

  #===================================================================================================
  # Instead of finite differences, just compute the entropy at a single temperature directly using all three temperature info
  #===================================================================================================

  if (calctype != 'alchemical' or dofdalchemical):

     (df,ddf,du,ddu,ds,dds) = mbart.computeEntropyAndEnthalpy()
  
     Deltaf_ij_estimated = df[0:K,0:K]
     dDeltaf_ij_estimated = ddf[0:K,0:K]
     Deltau_ij_estimated = du[0:K,0:K]
     dDeltau_ij_estimated = ddu[0:K,0:K]
     Deltas_ij_estimated = ds[0:K,0:K]
     dDeltas_ij_estimated = dds[0:K,0:K]
     setReplicate(replicate,'mbarall',Deltaf_ij_estimated,dDeltaf_ij_estimated,Deltau_ij_estimated,dDeltau_ij_estimated,Deltas_ij_estimated,dDeltas_ij_estimated,calctype!='alchemical')

  #=============================================================================================
  # Store data for this replicate.
  #=============================================================================================  
  replicates.append(replicate)

if (calctype != 'alchemical'):
  strs = ('mbar','endpoint','ti_single','ti_multi','fd_fep','fd','fd_mbar','mbarall')
elif (dofdalchemical):
  strs = ('mbar','endpoint','ti_single','fd_fep','fd','fd_mbar','mbarall')
else:
  strs = ('mbar','endpoint','ti_single','fd_fep')

vals = numpy.zeros(nreplicates,float)
stds = numpy.zeros(nreplicates,float)
errs = numpy.zeros(nreplicates,float)
das  = ['df','ds','du']
if (calctype != 'alchemical'):
  print "#Analytical df: %10.6f ds: %10.6f du: %10.6f" % (Deltaf_ij_analytical[0,K-1],Deltas_ij_analytical[0,K-1],Deltau_ij_analytical[0,K-1])
  print "#Nall: %d  nreplicates: %d  noise: %d" % (nall, nreplicates,Nnoise) 
print "             df: smpl_mean smpl_std analyt_std  ds: smpl_mean smpl_std analyt_std  du: smpl_mean smpl_std analyt_std"

for istr in strs:
   print "%-10s" % istr,
   for da in das:  
      for index,replicate in enumerate(replicates):
         str = da + '_' + istr
         vals[index] = numpy.array(replicate[str])[0][K-1]
         str = 'd'+ da + '_' + istr
         stds[index] = numpy.array(replicate[str])[0][K-1]
         if (calctype != 'alchemical'):
            str = da + '_' + istr + '_error'
            errs[index] = numpy.array(replicate[str])[0][K-1]
      
      sample_std = numpy.std(vals,ddof=1) 
      sample_mean = numpy.average(vals)
      analytical_std = numpy.average(stds)
      # currently, all results in reduced units.
      print "    %10.5f%10.5f%10.5f" % (sample_mean,sample_std,analytical_std), 


   print ""
