"""
timeline_graph.py

Script to generate a data file we can use to plot the timeline data in Plot.

"""

import os
import csv
import sys
import math
import numpy
import random
import MySQLdb
import operator
import rpy2.robjects as robjects

def significance_query(writer, stitch_id, umls_id):
    query = """
    select year, -log(pvalue), year_mu, year_sd
    from timeline
    join
    (
    	select year, avg(-log(pvalue)) as year_mu, std(-log(pvalue)) as year_sd, count(*) as N
    	from timeline
    	where stitch_id = '%s'
    	group by year
    ) stats using (year)
    where stitch_id = '%s'
    and umls_id = '%s'
    and year < 2005
    group by stitch_id, umls_id, year
    order by year;
    """ % (stitch_id, stitch_id, umls_id)
    if c.execute(query) == 0:
        raise Exception("No results found for stitch_id %s" % stitch_id)
    
    data = c.fetchall()
    for year, score, mu, sd in data:
        writer.writerow([year, score, mu, mu + sd, max(0, mu - sd)])

def reporting_query(writer, stitch_id, umls_id):
    query = """
    select year, fg_mu, fg_sd, bg_mu, bg_sd
    from timeline
    where stitch_id = '%s'
    and umls_id = '%s'
    and year < 2005
    group by stitch_id, umls_id, year
    order by year;
    """ % (stitch_id, umls_id)
    if c.execute(query) == 0:
        raise Exception("No results found for stitch_id %s" % stitch_id)
    
    data = c.fetchall()
    for year, fg_mu, fg_sd, mu, sd in data:
        writer.writerow([year, fg_mu, fg_mu + fg_sd, max(0, fg_mu - fg_sd), mu, mu + sd, max(0, mu - sd)])
    

SHARED = os.path.expanduser('~/Dropbox/DocDump/')

db = MySQLdb.connect(host="localhost", port=3307, user="root", passwd="enter_your_password",db="project_aers")
c = db.cursor()

# stitch_id = 'CID000005090' # vioxx
# stitch_id = 'CID000001302' # naproxen
stitch_id = 'CID000002662' # celebrex
# stitch_id = 'CID000003672' # ibuprofen
# umls_id = 'C0027051' # myocardial infarction
# umls_id = 'C0008031' # chest pain
umls_id = 'C0013395' # upset stomach

# stitch_id = 'CID000077999' # rosiglitazone / avandia
# stitch_id = 'CID000004829' # pioglitazone/actos, in same class as avandia
# stitch_id = 'CID000004091' # metformin
# umls_id = 'C0023895' # liver disorder

# stitch_id = 'CID000004723' # pemoline
# umls_id = 'C0160390' # liver injury

# Make a plot of the -log of the pvalues.
file_name = os.path.join(SHARED, "%s_%s_timeline_plot_data_significance.csv" % (stitch_id, umls_id))
outfh = open(file_name, 'w')
writer = csv.writer(outfh)
significance_query(writer, stitch_id, umls_id)
outfh.close()

# Make a plot of the reporting frequencies against the background.
file_name = os.path.join(SHARED, "%s_%s_timeline_plot_data_frequency.csv" % (stitch_id, umls_id))
outfh = open(file_name, 'w')
writer = csv.writer(outfh)
reporting_query(writer, stitch_id, umls_id)
outfh.close()