"""
Computes the correlated indications for the drugs.
"""

import csv
import sys
import numpy
import MySQLdb
import rpy2.robjects as robjects
from namedmatrix import NamedMatrix

db = MySQLdb.connect(host="localhost", port=3307, user="root", passwd="enter_your_password",db="project_aers")
c = db.cursor()

# Load up the drug data.
query = """
select stitch_id, isr_report_id
from effect_aers.drugs
join effect_aers.aers2stitch using (drug_name);
"""
c.execute(query)

drug_report = dict()

for stitch_id, isr_report_id in c.fetchall():
    if stitch_id not in drug_report:
        drug_report[stitch_id] = set()
    drug_report[stitch_id].add( isr_report_id )


# Load up the indicaiton data.
query = """
select indication_descrip_term, isr_report_id
from effect_aers.indications
"""
c.execute(query)

indication_report = dict()
for indication, isr_report_id in c.fetchall():
    if indication not in indication_report:
        indication_report[indication] = set()
    indication_report[indication].add( isr_report_id )

drug_cids = sorted([k for k,v in drug_report.items() if len(v) >= 100])
indications = sorted([k for k,v in indication_report.items() if len(v) >= 100])

# all_indication_report_ids = reduce(operator.or_, indication_report.values())
# all_drug_report_ids = reduce(operator.or_, drug_report.values())
common_report_ids = reduce(operator.or_, indication_report.values()) & reduce(operator.or_, drug_report.values())

total_reports = len(common_report_ids)

data = []
keys = []
corr = dict()

for i,cid in enumerate(drug_cids):
    if i % 10 == 0:
        print >> sys.stderr, "Building data for drug %d of %d" % (i, len(drug_cids))
    
    correction = 0
    for indication in indications:
        
        x = len(drug_report[cid]&indication_report[indication])
        if x > 0:
            y = len(drug_report[cid]-indication_report[indication])
            z = len(indication_report[indication]-drug_report[cid])
            w = total_reports - len(drug_report[cid] | indication_report[indication])
            
            data.append( [x, y, z, w] )
            keys.append( [cid, indication])
            correction += 1
    
    corr[cid] = correction

# Run the correlation tests in R.

# Save the data to file.
tmp_file = tempfile.mkstemp(suffix=".csv")[-1]
tmp_fh = open(tmp_file, 'w')
writer = csv.writer(tmp_fh)
writer.writerows(data)
tmp_fh.close()

out_file = tempfile.mkstemp(suffix=".csv")[-1]
script = """
data <- read.csv('%s', header=F)
run.cor.test <- function(row) {
x = c(rep(1,row[1]), rep(1,row[2]), rep(0,row[3]), rep(0,row[4]))
y = c(rep(1,row[1]), rep(0,row[2]), rep(1,row[3]), rep(0,row[4]))
res <- cor.test(x,y)
return(c( res$p.value, res$estimate ))
}
result <- t(apply(data, 1, run.cor.test))
write.csv(result, file='%s')
""" % (tmp_file, out_file)

robjects.r(script)
reader = csv.reader(open(out_file))
reader.next()
correlations = [map(float,row[1:]) for row in reader]

for (cid,ind),(pval, estimate) in zip(keys,correlations):
    q = "INSERT INTO corr_drug_ind_random (stitch_id, indication, pvalue, corrected, estimate) VALUES ('%s',\"%s\",%.10e,%.10e,%.10f);" % (cid, ind, pval, min(1,float(pval)*corr[cid]), estimate)
    c.execute(q)