"""
Script to build data sets needed for Circos to draw pretty DDI plots.

"""

import os
import sys
import csv
import MySQLdb

c = MySQLdb.connect(host="localhost", port=3307, user="root", passwd="enter_your_password",db="project_aers").cursor()


# Note that we are working off of the interactions derived from pairs of drugs
# from the top 100 most prescribed drugs lists. So rather than ~700 drugs around
# the circos circle, there will be only ~100.

# C1145628  Autonomic Nervous Disorders
# C0007222  Cardiovascular Disorders
# C0014136  Endocrine Disorders
# C0017178  Gastrointestinal Disorders
# C0080276  Genitourinary Disorders
# C0017411  Gynecologic Disorders
# C0019080  Hemorrhage
# C0549600  Maternal-Fetal Disorders
# C0028715  Metabolic and Nutritional Disorders
# C0025517  Metabolic Disorders
# C0026860  Musculo-skeletal System
# C0027763  Nervous System
# C0015397  Ophthalmic Disorders
# C0030660  Pathological Disorders
# C0024115  Pulmonary Disorders
# C0022658  Renal Disorders
# C0035237  Respiratory System
# C0549602  Reticuloendthelial Disorders
# C0037274  Skin and Appendages

BLOCK_SIZE = 100
cui_list = ['C1145628','C0007222','C0014136','C0017178','C0080276','C0017411','C0019080','C0549600','C0028715','C0025517','C0026860','C0027763','C0015397','C0030660','C0024115','C0022658','C0035237','C0549602','C0037274']
cui_list.remove('C0030660')

blues = ['blue1','blue2','blue3','blue4','blue5']
reds = ['red1','red2','red3','red4','red5']

for cui in cui_list:
    
    print >> sys.stderr, "Running for cui: %s" % cui
    
    #################################
    #
    # Build "data/segments_CATEGORY.txt"
    #
    #################################
    
    # query = """
    # select distinct atcid
    # from compound_stitch.atc2stitch_uniq
    # join
    # (
    #   select distinct stitch_id1 stitch_id
    #   from project_aers.pred_pair_events_p_e5
    #   union
    #   select distinct stitch_id2 stitch_id
    #   from project_aers.pred_pair_events_p_e5
    # ) a using (stitch_id);
    # """
    query = """
    select distinct atcid
    from compound_stitch.atc2stitch_uniq
    """
    c.execute(query)
    
    atcids = set([x[0] for x in c.fetchall()])
    
    # The "Chromosomes"
    chromomsomes = sorted(set([x[0] for x in atcids]))
    segment_fh = open('/tmp/segments_%s.txt' % cui,'w')
    for level1 in chromomsomes:
        num = len(set([x for x in atcids if x[0] == level1]))*BLOCK_SIZE
        print >> segment_fh, "chr - %(l1)s %(l1)s 0 %(N)d" % {'l1':level1, 'N':num}
    
    
    query = """
    select first5, -log(abs(coef),1.5)*sign(coef)
    from eval_first5_heat_anova_drugmean
    join effect_sider.costart_top on (event_class = `desc`)
    where cui = '%s'
    and pvalue < 0.05
    """ % (cui)
    c.execute(query)
    
    level42coef = dict()
    for first5, coef in c.fetchall():
        level42coef[first5] = coef
    
    min_coef = min(level42coef.values())
    max_coef = max(level42coef.values())
    
    # The "Bands"
    for level1 in chromomsomes:
        
        bands = sorted(set([x[:5] for x in atcids if x[0] == level1]))
        start = 0
        for i,level4 in enumerate(bands):
            num = len(set([x for x in atcids if x[:5] == level4]))
            end = start + num*BLOCK_SIZE
            
            if level4 in level42coef:
                if level42coef[level4] > 0:
                    color = reds[int(len(reds) * (level42coef[level4])/(max_coef))-1]
                else:
                    color = blues[int(len(reds) * (abs(level42coef[level4]))/(abs(min_coef)))-1]
            else:
                color = 'yell'
            
            arguments = {
                'l1': level1,
                'l4': level4,
                'start': start,
                'end': end,
                'color': color
            }
            print >> segment_fh, "band %(l1)s %(l4)s %(l4)s %(start)d %(end)s %(color)s" % arguments
            start = end
    
    segment_fh.close()
    
    
    #################################
    #
    # Build "data/links_CATEGORY.txt" and "data/labels_CATEOGRY.txt"
    #
    #################################
    
    query = """
    select atcid, drug_name
    from compound_stitch.atc
    group by atcid
    """
    c.execute(query)
    
    atc2name = dict()
    for atcid, name in c.fetchall():
        if len(name) > 17 and not name.find(' ') == -1:
            name = name.split()[0]
        
        if len(name) > 17:
            name = name[:17]
        
        atc2name[atcid] = name
    
    
    query = """
    select dclass1, dclass2
    from
    (
    	select *
    	from project_aers.eval_emr_pair_class_ancova_full
    	where combo_n >= 20
    	and p_d1mc < 0.05 and p_d2mc < 0.05
    	and p_ancova_corr < 0.05
    	and (sign(combo_delta_mu-d1_delta_mu) = sign(combo_delta_mu-d2_delta_mu))
    ) a
    where cui = '%(cui)s'
    group by dclass1, dclass2
    """ % {'cui': cui}
    c.execute(query)
    
    clinically_validated = set()
    for dclass1, dclass2 in c.fetchall():
        clinically_validated.add('%s,%s' % (dclass1, dclass2))
        clinically_validated.add('%s,%s' % (dclass1, dclass2))
    
    query = """
    select cui, round(1/5*count(*)) as min_reports
    from effect_sider.costart_top
    join effect_sider.costart_categories using (cui)
    where cui = '%s'
    group by cui
    """ % cui
    c.execute(query)
    min_reports = [x[1] for x in c.fetchall()][0]
    
    query = """
    select a.atcid, b.atcid, max(p_interaction is not null) sigclass, count(t_statistic > 10) numsig
    from project_aers.pred_pair_events_p_e5_novel
    join compound_stitch.atc2stitch_uniq a on (a.stitch_id = stitch_id1)
    join compound_stitch.atc2stitch_uniq b on (b.stitch_id = stitch_id2)
    join effect_sider.costart_categories c using (umls_id)
    left join
    (
        select dclass1, dclass2, p_interaction
        from project_aers.eval_pair_eclass_drugmean
        where p_interaction < 0.05/100000
        and cui = '%(cui)s'
    ) sig on ((a.first5 = dclass1 and b.first5 = dclass2) or (b.first5 = dclass1 and a.first5 = dclass2))
    where cui = '%(cui)s'
    group by a.atcid, b.atcid
    having (sigclass > 0 or numsig > %(min_reports)d);
    """ % {'cui':cui, 'min_reports':min_reports}
    c.execute(query)
    
    background_links = []
    foreground_links = []
    validated_links = []
    labels = set()
    
    link_counter = 0
    valid_counter = 0
    drawn_links = set()
    for atcid1, atcid2, significant, numbtwn in c.fetchall():
        
        drawn_key = '%s-%s' % tuple(sorted([atcid1, atcid2]))
        if drawn_key in drawn_links:
            continue
        
        drawn_links.add(drawn_key)
        
        pos1 = sorted([x for x in atcids if x[0] == atcid1[0]]).index(atcid1)
        pos2 = sorted([x for x in atcids if x[0] == atcid2[0]]).index(atcid2)
        dclass1 = atcid1[:5]
        dclass2 = atcid2[:5]
        validated = '%s,%s' % (dclass1, dclass2) in clinically_validated or '%s,%s' % (dclass2, dclass1) in clinically_validated
        color = 'drugdrug' if significant == 0 else 'classclass' if not validated else 'validated'
        if validated:
            valid_counter += 1
        
        arguments1 = {
            'counter': link_counter,
            'l1': atcid1[0],
            'pos': int(pos1*BLOCK_SIZE + 0.5*BLOCK_SIZE),
            'color': color,
            'name': atc2name.get(atcid1, atcid1).lower().replace(' ','_')
        }
        arguments2 = {
            'counter': link_counter,
            'l1': atcid2[0],
            'pos': int(pos2*BLOCK_SIZE + 0.5*BLOCK_SIZE),
            'color': color,
            'name': atc2name.get(atcid2, atcid2).lower().replace(' ','_')
        }
        if validated:
            validated_links.append("link_%(counter)d %(l1)s %(pos)s %(pos)s color=%(color)s" % arguments1)
            validated_links.append("link_%(counter)d %(l1)s %(pos)s %(pos)s color=%(color)s" % arguments2)
            labels.add("%(l1)s %(pos)s %(pos)s %(name)s" % arguments1)
            labels.add("%(l1)s %(pos)s %(pos)s %(name)s" % arguments2)
        elif significant == 1:
            foreground_links.append("link_%(counter)d %(l1)s %(pos)s %(pos)s color=%(color)s" % arguments1)
            foreground_links.append("link_%(counter)d %(l1)s %(pos)s %(pos)s color=%(color)s" % arguments2)
            labels.add("%(l1)s %(pos)s %(pos)s %(name)s" % arguments1)
            labels.add("%(l1)s %(pos)s %(pos)s %(name)s" % arguments2)
        else:
            background_links.append("link_%(counter)d %(l1)s %(pos)s %(pos)s color=%(color)s" % arguments1)
            background_links.append("link_%(counter)d %(l1)s %(pos)s %(pos)s color=%(color)s" % arguments2)
        
        link_counter += 1
    
    print >> sys.stderr, "Found %d background links." % len(background_links)
    print >> sys.stderr, "Found %d foreground links." % len(foreground_links)
    print >> sys.stderr, "Found %d known ddi links." % valid_counter
    
    link_fh = open('/tmp/links_%s.txt' % cui, 'w')
    for link in background_links:
        print >> link_fh, link
    
    link_fh.close()
    
    link_fh = open('/tmp/links_%s_classclass.txt' % cui, 'w')
    for link in foreground_links:
        print >> link_fh, link
    
    link_fh.close()
    
    link_fh = open('/tmp/links_%s_validated.txt' % cui, 'w')
    for link in validated_links:
        print >> link_fh, link
    
    link_fh.close()
    
    label_fh = open('/tmp/labels_%s.txt' % cui, 'w')
    for label in labels:
        print >> label_fh, label
    
    label_fh.close()
    
    
    #################################
    #
    # Build "data/heat_CATEGORY_EVENT.txt"
    #
    #################################
    
    # The heatmap gives the individual drug associations to the "top" adverse events
    #  in the adverse event category. Top is defined to be the top 10 most populous
    # adverse events (most associated according to individual aers analysis).
    
    query = """
    select atcid, umls_id, t_statistic
    from pred_drug_events_e5
    join compound_stitch.atc2stitch_uniq using (stitch_id)
    join
    (
    	# Get the top adverse events for this category
    	select umls_id, event, count(distinct stitch_id) cnt
    	from effect_sider.costart_categories
    	join offsides using (umls_id)
    	join top_prescriptions using (stitch_id)
    	where cui = '%(cui)s'
    	group by umls_id
    	order by cnt desc
    	limit 10
    ) top using (umls_id)
    where t_statistic > 5
    """ % {'cui':cui}
    c.execute(query)
    
    heat_data = c.fetchall()
    
    events = sorted(set([x[1] for x in heat_data]))
    
    files = [open('/tmp/heat_%s_AE%d.txt' % (cui, i), 'w') for i,event in enumerate(events)]
    
    for atcid, event, value in heat_data:
        if atcid in atcids:
            pos = sorted([x for x in atcids if x[0] == atcid[0]]).index(atcid)
            arguments = {
                'l1': atcid[0],
                'pos': pos*BLOCK_SIZE,
                'end': pos*BLOCK_SIZE + BLOCK_SIZE,
                'value': value
            }
            print >> files[events.index(event)], "%(l1)s %(pos)s %(end)s %(value)s" % arguments
    
    for fh in files:
        fh.close()