"""

Description:
This script was written to extract desired data quickly from a user selected set of MULTIS trials. A report will be
generated to show the average and standard deviation of all the collected data, the user may choose to save the
generated report if desired. The numbers

Relevant variables include:
    age
    gender
    ethnicity
    height
    weight
    BMI - body mass index
    activity level

    Separated by segment (UA - Upper Arm, LA - Lower Arm, UL - Upper Leg, LL - Lower Leg):
    l - Segment length
    dc - Distal circumference
    cc - Central circumference
    pc - Proximal circumference


Getting started:
    Simply run the script and the MULTIS trials with available subject xml files will populate a list where you can
    check the ones you would like to include in the report. You may also hit the 'Select All' button to automatically
    select all the trials. Hit 'Okay' and the report will be generated for you to view. You may choose to save the
    report by clicking on the save button within the matplotlib figure.
    See data file contents on project Wiki for description of each category number (page 7-8)
    https://simtk.org/plugins/moinmoin/multis/Infrastructure/InstrumentedUltrasound?action=AttachFile&do=view&target
    =2015CB-047-001.B+Data+File+Contents_MULTIS.pdf

    Original Author:
        Erica Morrill
        Department of Biomedical Engineering
        Lerner Research Institute
        Cleveland Clinic
        Cleveland, OH
        morrile2@ccf.org

"""

import xml.etree.ElementTree as ET
import os
import Tkinter as tk
import pandas
import tkMessageBox
import tkFileDialog
import matplotlib.pyplot as plt
from matplotlib.ticker import MaxNLocator
import numpy as np

class FileSelectionApp(tk.Tk):
    """Application to display all of the trials in a list"""

    def __init__(self):
        tk.Tk.__init__(self)

        home = os.path.expanduser('~')
        for dirname, subdirList, fileList in os.walk(home):
            for dir in subdirList:
                if "MULTIS_test" in dir and "studies" not in dirname:
                    print(dirname + dir)
                    multis_dir = dirname + '/' + dir

        try:
            multis_dir
        except NameError:
            multis_dir = tkFileDialog.askdirectory(title='Locate MULTIS_test directory')

        self.directory = multis_dir
        self.getSubjects()
        self.title('Select Files')
        self.df = pandas.DataFrame(data=None, columns=('Age (yrs)', 'Gender', 'Ethnicity', 'Race', 'Height (cm)', 'Mass (kg)', 'BMI', 'Activity', 'UA_l (cm)', 'UA_dc (cm)', 'UA_cc (cm)', 'UA_pc (cm)', 'LA_l (cm)', 'LA_dc (cm)', 'LA_cc (cm)', 'LA_pc (cm)', 'UL_l (cm)', 'UL_dc (cm)', 'UL_cc (cm)', 'UL_pc (cm)', 'LL_l (cm)', 'LL_dc (cm)', 'LL_cc (cm)', 'LL_pc (cm)'))

        self.columnconfigure(0, weight=1)

        self.var = []
        self.i = 0
        for item in self.subFiles:
            self.var.append(tk.IntVar())
            c = tk.Checkbutton(self, text=item[0:-4], variable=self.var[self.i])
            c.grid(column = 0, row=self.i, sticky='w')
            self.i +=1

        self.minsize(200, 100/(self.i))
        tk.Button(self, text="Okay", command=self.checkBoxes).grid(row=1, column =1, sticky='ens')
        tk.Button(self, text="Select All", command=self.SelectAll).grid(row=0, column=1, sticky='ens')

    def yview(self, *args):
        apply(self.yview, args)

    def checkBoxes(self):
        count = 0
        for bb in self.var:
            if bb.get() == 1:
                self.saveData(count)
            count += 1

        #Build Function to create report
        self.avg = self.df.mean()
        self.stdDev = self.df.std()
        self.df[["Gender", "Ethnicity", "Race", "Activity"]] = self.df[["Gender", "Ethnicity", "Race", "Activity"]].astype(int)
        print(self.df.dtypes)
        print('Results')
        print(self.avg, self.stdDev)

        self.df.to_csv(self.directory + '/ThickSumFigsNew/' + '001_Demographics_anthropometrics.csv')
        self.destroy()
        self.plot_data()
        self.quit()

    def saveData(self, count):
        xml_name = self.dir[count]+'/'+self.subFiles[count]
        doc = ET.parse(xml_name)
        root = doc.getroot()

        subjData = root.find("Subject_Data")
        Anatomical = subjData.find("Anatomical_Measurements")
        demographics = subjData.find("Demographics")
        HeightMass = subjData.find("Height_and_Mass")
        Activity = subjData.find("Activity_Level")

        age = int(demographics.find("Age").text)
        gender = int(demographics.find("Gender").text)
        ethnicity = int(demographics.find("Ethnicity").text)
        race = int(demographics.find("Race").text)

        height = float((HeightMass.find("Height")).find("Magnitude").text)
        mass = float((HeightMass.find("Mass")).find("Magnitude").text)

        # Find specific subjects given a certain criteria
        if gender == 0:
            print self.subFiles[count]

        if (HeightMass.find("Height")).find("Units").text == '31' and (HeightMass.find("Mass")).find("Units").text == '20':
            BMI = mass/((height/100)**2)
        else:
            tkMessageBox.showerror("Error", "BMI could not be calculated because either height or mass had the wrong units")

        activity = int((Activity.find("Lifestyle")).text)

        column_names = ['Age (yrs)', 'Gender', 'Ethnicity', 'Race', 'Height (cm)', 'Mass (kg)', 'BMI', 'Activity']
        df_demo = pandas.DataFrame([[age, gender, ethnicity, race, height, mass, BMI, activity]], columns=column_names)

        for child in Anatomical:
            if child.attrib['type'] == 'Cluster':
                for cl in child.findall("Cluster"):
                    if cl._children[0].text == 'Length':
                        length = float(cl._children[1].text)
                    elif cl._children[0].text == 'Distal Circumference':
                        circ_d = float(cl._children[1].text)
                    elif cl._children[0].text == 'Central Circumference':
                        circ_c = float(cl._children[1].text)
                    elif cl._children[0].text == 'Proximal Circumference':
                        circ_p = float(cl._children[1].text)

                if child.tag == 'Upper_Arm':
                    column_names = ['UA_l (cm)', 'UA_dc (cm)', 'UA_cc (cm)', 'UA_pc (cm)']
                    df_UA = pandas.DataFrame([[length, circ_d, circ_c, circ_p]], columns=column_names)
                elif child.tag == 'Lower_Arm':
                    column_names = ['LA_l (cm)', 'LA_dc (cm)', 'LA_cc (cm)', 'LA_pc (cm)']
                    df_LA = pandas.DataFrame([[length, circ_d, circ_c, circ_p]], columns=column_names)
                elif child.tag == 'Upper_Leg':
                    column_names = ['UL_l (cm)', 'UL_dc (cm)', 'UL_cc (cm)', 'UL_pc (cm)']
                    df_UL = pandas.DataFrame([[length, circ_d, circ_c, circ_p]], columns=column_names)
                elif child.tag == 'Lower_Leg':
                    column_names = ['LL_l (cm)', 'LL_dc (cm)', 'LL_cc (cm)', 'LL_pc (cm)']
                    df_LL = pandas.DataFrame([[length, circ_d, circ_c, circ_p]], columns=column_names)

        self.df = self.df.append(pandas.concat([df_demo, df_UA, df_LA, df_UL, df_LL], axis=1), ignore_index=True)



    def plot_data(self):

        fig, axes = plt.subplots(6, 4, figsize=(12, 12))
        ii = 0
        for v in self.df:
            if self.df[v].dtypes == 'int64':
                if v == 'Activity' or v == 'Race':
                    bins = [-0.5, 0.5, 1.5, 2.5, 3.5, 4.5]
                    ticks = [0, 1, 2, 3, 4]
                else:
                    bins = [-0.5, 0.5, 1.5]
                    ticks = [0, 1]
                info = axes[ii // 4][ii % 4].hist(self.df[v], bins=bins, align='mid')
                axes[ii // 4][ii % 4].set_title(v)
                axes[ii // 4][ii % 4].set_xticks(ticks)
                yy = axes[ii // 4][ii % 4].get_ylim()
                axes[ii // 4][ii % 4].set_ylim([yy[0], yy[1] + np.max(info[0]) * 1 / 3])
                axes[ii // 4][ii % 4].yaxis.set_major_locator(MaxNLocator(integer=True))
                axes[ii // 4][ii % 4].locator_params(axis='y', nbins=3)

                for count, x in zip(info[0], info[1]):
                    percent = '%0.0f%%' % (100 * float(count) / info[0].sum())
                    axes[ii // 4][ii % 4].annotate(percent, xy=(x+0.5, count), va='bottom', ha='center')
                ii += 1

            else:
                info = axes[ii//4][ii%4].hist(self.df[v])
                axes[ii // 4][ii % 4].set_title(v+'\nAvg = %.2f +/- %.2f' % (self.avg[ii], self.stdDev[ii]))
                axes[ii // 4][ii % 4].yaxis.set_major_locator(MaxNLocator(integer=True))
                axes[ii // 4][ii % 4].locator_params(axis='y', nbins=3)
                axes[ii // 4][ii % 4].xaxis.set_major_locator(MaxNLocator(5))
                ii+=1

        plt.tight_layout()
        plt.show()


    def SelectAll(self):
        for bb in self.var:
            bb.set(1)


    def getSubjects(self):
        self.subFiles = []
        self.dir = []
        for dirname,subdirList,fileList in os.walk(self.directory):
            for filename in fileList:
                if ".xml" in filename.lower() and len(filename) == 15 and filename[0:6] == "MULTIS":
                    self.subFiles.append(filename)
                    self.dir.append(dirname)
        sortedTrials = sorted(zip(self.subFiles, self.dir))
        self.subFiles, self.dir = zip(*sortedTrials)



if __name__ == "__main__":

    app = FileSelectionApp()
    app.mainloop()