<html><head><meta name="color-scheme" content="light dark"></head><body><pre style="word-wrap: break-word; white-space: pre-wrap;">"""
Computes the correlated indications for the drugs.


CLUSTER
=======

for ((i=0; i&lt;810; i+=50)); do bsub -o output/$i.out python correlated_indications_nosql.py $i 50; done

"""

################################################
#
# BE AWARE: This is currently set up for EMR
#
################################################

import csv
import sys
import math
import operator

start = int(sys.argv[1])
stop = start + int(sys.argv[2])

MIN_REPORTS = 50

drug_report = dict()
drugreps = set()
for report_id, stitch_id in csv.reader(open('raw_mapped_drugs.txt'), delimiter='\t'):
    if stitch_id not in drug_report:
        drug_report[stitch_id] = set()
    drug_report[stitch_id].add( report_id )
    drugreps.add( report_id )

indication_report = dict()
indreps = set()
for indication, report_id in csv.reader(open('raw_mapped_indications.txt'), delimiter='\t'):
    if indication not in indication_report:
        indication_report[indication] = set()
    indication_report[indication].add( report_id )
    indreps.add( report_id )

drug_cids = sorted([k for k,v in drug_report.items() if len(v) &gt;= MIN_REPORTS])
indications = sorted([k for k,v in indication_report.items() if len(v) &gt;= MIN_REPORTS])

total_reports = len(indreps &amp; drugreps)

resultfh = open('results/%d_%d.csv' % (start, stop), 'w')
writer = csv.writer(resultfh)

for i in range(start, stop):
    cid = drug_cids[i]
    
    print &gt;&gt; sys.stderr, "Building data for drug %d of %d" % (i, len(drug_cids))
    
    for indication in indications:
        
        n11 = len(drug_report[cid]&amp;indication_report[indication])
        if n11 &gt; 0:
            n10 = len(drug_report[cid]-indication_report[indication])
            n01 = len(indication_report[indication]-drug_report[cid])
            n00 = total_reports - len(drug_report[cid] | indication_report[indication])
            
            phi = (n11*n00 - n10*n01)/math.sqrt((n10+n11)*(n01+n00)*(n10+n00)*(n11+n01))
            
            writer.writerow([cid, indication, phi, n11, n10, n01, n00])

resultfh.close()
</pre></body></html>