"""
A library of functions to find and process reaction coordinates
and kinetic metrics
"""

import os
import sys
import glob

import cvxopt
from cvxopt.solvers import qp
import numpy as np
import matplotlib.pyplot as plt

from msmbuilder import Serializer


class metric():
    """ A class to contain the order parameter/metric information. These will be
    the 'basis' metrics.

    Each metric is a loaded file that is tau X (l - tau), as in lag times by data
    points. -1s indicate places where no data is available, but they fill space
    to make arrays square.
    """
 
    def __init__(self, file_name):
        self.type  = "metric"
        assert file_name[-3:] == '.h5'
        D = Serializer.Serializer.LoadFromHDF( file_name )
        self.name  = os.path.basename( file_name )[:-3]
        self.times = D['Taus']
        self.data  = D['Data']

    def time_slice(self, time):
        ind  = np.where( self.times == time )[0][0]
        nan_inds = np.where( self.data[ind,:] == -1 )[0]
        if len(nan_inds) > 0:
            last = np.min( nan_inds )
        else:
            last = self.data.shape[1]
        return self.data[ind,:last]

    def norm(self, norm):
        if norm != None:
            neg_inds = np.where( self.data == -1.0 )
            self.data[ neg_inds ] = 0.0
            
            if norm == 'InftyNorm': # *OBSOLETE* dividing by the mean at tau=infty
                neg_inds = np.where( self.data == -1.0 )
                self.data[ neg_inds ] = 0.0
                tau_max  = np.max( self.times )
                ind      = np.where( self.times == tau_max )
                scale_factor = np.mean( self.data[ind,:]  )
                self.data /= scale_factor

            elif norm == 'VarOne':  # set all variances to one at each tau
                scale_factor = np.sqrt( np.var( self.data, axis=1 ) )
                for i in range(self.data.shape[0]):
                    self.data[i,:] /= scale_factor[i]
                    
            self.data[ neg_inds ] = -1 # reset placeholders
            
        else:
            print "Not normalizing metric: %s" % self.name
           
        if len( np.where( np.isnan(self.data) == True )[0] ) != 0:
            print "WARNING:  NaNs detected. Attempting to proceed with zeros."
            print "LOCATION:", np.where( np.isnan(self.data) == True )
            print "NUBMER:   %d" % len( np.where( np.isnan(self.data) == True )[0] )
            print "SCALE FACTOR:", scale_factor
            self.data = np.nan_to_num(self.data)

        return scale_factor
            

    def get_name(self):
        return self.name

    def get_times(self):
        if type( self.times) == int:
            t = [self.times]
            print "Error. You need more than one lag time for the code to run right now."
            sys.exit(1)
        else:
            t = self.times
        return t

    def get_data(self):
        return self.data


class objective():
    """ a class dealing with the calculation of the objective function, a
    weighted combination of the variances of the distributions of a function
    at a given lag """

    def __init__(self):
        self.type         = "objective"
        self.times        = []            # the times at which to calculate variances
        self.weights      = []            # the weights associated w times (indexed as times)
        self.parameters   = []            # the parameter values
        self.metrics      = {}            # the basis metrics (points to metric objects)
        self.metric_names = []            # the names of the metrics, in the order they were added
        self.scale_factors= []            # the scale factors used to normalize input data
        self.Smat         = None          # the weighted combined covar matrix
        self.obj_value    = 0.0           # the objective fxn value

    # add metrics
    def add_metric(self, metric, Norm='VarOne'):
        if metric in self.metrics.keys():
            print "Error. Metric: %s already in objective class. Skipping." % metric.name()
        else:
            self.metric_names.append( metric.get_name() )
            if Norm: 
                sf = metric.norm(Norm)
            else:
                sf = np.ones( len(metric.get_times()) )
            self.scale_factors.append(sf)
            self.metrics[ metric.get_name() ] = metric
            self.parameters.append(0.0)
            self.weights.append(0.0)

    def add_metric_directory(self, path):
        fns = glob.glob( path + "/*.h5" )
        print "\n --- Data Input ---\n"
        print "Located %d metrics in directory '%s'..." % ( len(fns), path )
        for fn in fns:
            m = metric(fn)
            print "Loading: %s" % m.get_name()
            self.add_metric(m)
        print "\nSuccessfully loaded all metrics.\n"

    # functions modifying the times and weights
    def set_weights(self, weights):
        self.weights = weights

    def set_times(self, times):
        self.times = times

    def initialize_default_weights(self, type='exponential', xi=0.01):
        """ A method to get default weights and times set.
        (1) get times common to all loaded metrics
        (2) set even weights for all those metrics
        (3) set the last time's weight to zero 

        types:
        exponential - weights decrease exponentially as time increases
        uniform     - weights are even across times 

        xi is a parameter that is used differently for each scheme :("""
        
        d = [ list(self.metrics[mn].get_times()) for mn in self.metric_names ]
        self.times = list( set(d[0]).intersection(*d) )

        if len(self.times) == 0:
            print "Error. No common lag times in data! Exiting."
            sys.exit(1)
        
        if type == 'exponential':
            print "Using common times:", self.times, "with exponential weights"
            print "Exponential scaling factor (xi): %f" % xi
            self.weights = np.exp( -1 * xi * np.array(self.times) )

        elif type == 'uniform':
            print "Using common times:", self.times, "with uniform weight"
            self.weights = np.ones( len(self.times) ) / float( len(self.times) )

    def intialize(self, times, weights):
        # check that times are valid
        assert len(times) == len(weights)
        d = [ list(self.metrics[mn].get_times()) for mn in self.metric_names ]
        interesct_times = list( set(times).intersection(*d) )
        if interesct_times != times:
            print "Warning! Not all metrics have the requested times. Exiting."
            sys.exit(1)
        self.get_Smat()

    # functions to calculate covariance matrices
    def get_Smat(self):
        
        n = self.dimension()
        weight_arr = np.zeros( len(self.times) )
        S = np.zeros(( n, n ))

        # at each time, construct data_arr, which is a metrics BY data array
        for i,time in enumerate( self.times ):
            data_arr = []
            for j, name in enumerate( self.metric_names ):
                data_arr.append( self.metrics[name].time_slice( time ) )
            data_arr = np.array( data_arr )

            # calculate and add the (weighted) covariance for each time
            S +=  self.weights[i] * np.cov( data_arr )

        self.Smat = S
        return S
    
    def evaluate_objective(self, parameters):
        self.parameters = parameters
        self.obj_value = np.dot(parameters.T, np.dot(self.Smat, parameters))
        return self.obj_value

    def get_objective(self):
        return self.obj_value

    def get_alphas(self):
        alphas = self.parameters.T * np.mean( (1.0/np.array( self.scale_factors )) * self.weights.T, axis=1)
        alphas = alphas.flatten() / alphas.sum()
        return alphas

    # parameter specific functions
    def get_params(self):
        return self.values

    def set_params(self, values):
        self.values = values

    # helper functions
    def dimension(self): # this is the number of metrics
        n = len( self.parameters )
        assert len( self.metrics.keys() ) == n
        assert len( self.metric_names )   == n
        return n

    def get_names(self):
        return self.metric_names


def quadratic_optimize(objective, param_norm=None):
    """ Uses a quadratic programming solver to optimize the convex parameter
    problem. Employs the CVXOPT module.

    Does most work in place by updating an objective object

    The optimization problem takes the following form:
    1/2 * x.T * P * x + q.T * x
    G*x =< H
    A*x = b

    returns:
    opt_params: The optimal parameters (array of floats)
    new_obj:    The optimal value of the objective function (float) """

    n = objective.dimension() # the dimension of the problem
    init_obj = objective.get_objective()
    print "\n --- Quadratic Optimization --- \n"
    print "Dimensions:\t%d\n" % n

    internal_norm = True # pretty sure this is right, TJL check

    # Initialize the relevant matrices
    P = cvxopt.matrix( objective.get_Smat() )
    q = cvxopt.matrix( np.zeros(n) )
    G = cvxopt.matrix( -1 * np.eye( n ) )
    if internal_norm == True:
        h = cvxopt.matrix( np.zeros(n) )
        A = cvxopt.matrix( np.ones(n), (1,n) )
        b = cvxopt.matrix( 1.0 )
        qout = qp(P, q, G, h, A, b)
    else:
        h = cvxopt.matrix( -1 * np.ones(n) / 10.0**1.0 ) # rly we want zero, but slightly larger to force non-zero
        qout = qp(P, q, G, h)
    opt_params = np.array( qout['x'] )

    # Set the result as the new parameters
    new_obj = objective.evaluate_objective(opt_params)
    print "\nFinal objective value:\t%e" % new_obj
    alphas = objective.get_alphas() 

    return alphas, opt_params, new_obj


def plot_lag_distributions(observables, times):
    """ plots the lag distribution at various times """
    plt.figure()
    for i,time in enumerate(times):
        dist = np.dot( parameters, observables.array()[:,::time,:] )
        plt.hist(dist.flatten(), lw=2, histtype='step')
    plt.xlabel(r"$\Delta m$")
    plt.ylabel("Frequency")
    plt.show()
    return