#!/usr/bin/env python
#
#
 
__author__ = "Randall J. Radmer"
__version__ = "1.0"
__doc__ = """Do basic stats"""
 
 
import math


sqrt2pi = math.sqrt(2*math.pi)
rSqrt2pi = 1/sqrt2pi


def calcMean(xList):
    return sum(xList)/len(xList)

def calcMeanVar(xList):
    n = len(xList)
    if n==0:
        raise Exception, 'Error: Data list has zero length'
        return None

    total = 0.0
    total2 = 0.0
    for x in xList:
        total+=x
        total2+=x**2
    mean = total/float(n)
    var = total2/float(n) - mean**2
    return (mean, var)

def calcMeanSD(xList):
    mean, var = calcMeanVar(xList)
    return (mean, math.sqrt(var))

def calcDeviationList(xList):
    mean=calcMean(xList)
    return ([x-mean for x in xList], mean)

def calcMedian(xList0):
    xList=sorted(xList0)
    n = len(xList)
    if n%2==0:
       return (xList[n/2-1] + xList[n/2])/2.0
    else:
       return xList[n/2]

def calcCov(xList, yList):
    n = len(xList)
    if n==0 or len(yList)==0:
        raise Exception, 'Error: Data list has zero length'
    if n!=len(yList):
        raise Exception, 'Error: Data lists have different lengths'

    xTotal = 0.0
    yTotal = 0.0
    xyTotal = 0.0
    for i in range(n):
        xTotal += xList[i]
        yTotal += yList[i]
        xyTotal += xList[i]*yList[i]
    return xyTotal/float(n) - (xTotal/float(n))*(yTotal/float(n))

def calcCor(xList, yList):
    (meanX, varX) = calcMeanVar(xList)
    (meanY, varY) = calcMeanVar(yList)
    cov  = calcCov(xList, yList)
    cor  = cov/math.sqrt(varX*varY)
    return cor

def calcAveCor(xList, yList):
    (aveX, varX) = calcMeanVar(xList)
    (aveY, varY) = calcMeanVar(yList)
    cov  = calcCov(xList, yList)
    cor  = cov/math.sqrt(varX*varY)
    return aveX, aveY, cor

def calcCovCorMatrix(dataSets0):
    dataSets=dataSets0
    try:
        keys=sorted(dataSets)
    except AttributeError:
        dataSets={}
        for i in range(len(dataSets0)):
            dataSets[str(i)]=dataSets0[i]
        keys=sorted(dataSets)
    numDataSets = len(keys)

    covMatrix = {}
    corMatrix = {}
    for key in keys:
        covMatrix[key]={}
        corMatrix[key]={}
        mean, var = calcMeanVar(dataSets[key])
        covMatrix[key][key]=var
        corMatrix[key][key]=1.0
    for key1 in keys:
        for key2 in keys:
            if key2>key1:
                cov = calcCov(dataSets[key1], dataSets[key2])
                covMatrix[key1][key2]=cov
                covMatrix[key2][key1]=cov
                try:
                    cor=cov/math.sqrt(covMatrix[key1][key1]*covMatrix[key2][key2])
                except ZeroDivisionError:
                    cor=0.0
                corMatrix[key1][key2]=cor
                corMatrix[key2][key1]=cor

    return (covMatrix, corMatrix)


def calcLinearRegression(xList, yList):
    n = len(xList)
    if n==0 or n!=len(yList): raise Exception
    xTotal = 0.0
    yTotal = 0.0
    xxTotal = 0.0
    xyTotal = 0.0
    yyTotal = 0.0
    for x, y in zip(xList, yList):
        xTotal += x
        yTotal += y
        xxTotal += x*x
        xyTotal += x*y
        yyTotal += y*y

    nnVarX = n*xxTotal - xTotal**2
    nnVarY = n*yyTotal - yTotal**2
    nnCov  = n*xyTotal - xTotal*yTotal

    m = nnCov / float(nnVarX)
    b = (yTotal - m*xTotal) / float(n)
    r = nnCov / math.sqrt(nnVarX*nnVarY)

    nn = float(n*n)
    return (m, b, r, nnVarX/nn, nnVarY/nn, nnCov/nn)



if __name__=='__main__':
    import sys

    xList = [1, 2, 3]
    mean, sd = calcMeanVar(xList)
    sys.stdout.write('For: %s\n' % xList)
    sys.stdout.write('mean = %.2f, var = %.2f\n' % (mean, sd))

    sys.stdout.write('\n')
    xList = [1, 2, 3]
    mean, sd = calcMeanSD(xList)
    sys.stdout.write('For:%s\n' % xList)
    sys.stdout.write('mean = %.2f, sd = %.2f\n'  % (mean, sd))

    sys.stdout.write('\n')
    wList = [ 1, -1,  1, -1]
    xList = [ 2, -2,  2, -2]
    yList = [ 1, -1,  1, -1]
    zList = [ 4,  0,  0,  0]
    (covMatrix, corMatrix)=calcCovCorMatrix( (wList, xList, yList, zList) )
    keys=sorted(covMatrix)
    sys.stdout.write('For:%s\n' % wList)
    sys.stdout.write('    %s\n' % xList)
    sys.stdout.write('    %s\n' % yList)
    sys.stdout.write('    %s\n' % zList)
    sys.stdout.write('covMatrix:\n')
    for key1 in keys:
        for key2 in keys:
            sys.stdout.write('%5.2f ' % (covMatrix[key1][key2]))
        sys.stdout.write('\n')
    sys.stdout.write('corMatrix:\n')
    for key1 in keys:
        for key2 in keys:
            sys.stdout.write('%5.2f ' % (corMatrix[key1][key2]))
        sys.stdout.write('\n')

    sys.stdout.write('calcLinearRegression\n')
    sys.stdout.write('For range(10), range(10): %s\n' % (calcLinearRegression(range(10), range(10)),))
    sys.stdout.write('For range(10), range(10, 0, -1): %s\n' % (calcLinearRegression(range(10), range(10, 0, -1)),))

