<html><head><meta name="color-scheme" content="light dark"></head><body><pre style="word-wrap: break-word; white-space: pre-wrap;">"""
Computes the correlated indications for the drugs.
"""

################################################
#
# BE AWARE: This is currently set up for EMR
#
################################################


import csv
import sys
import math
import MySQLdb
import operator

print &gt;&gt; sys.stderr, "Running correlated_indications.py version 0.1"

c = MySQLdb.connect(host="localhost", port=3307, user="root", passwd="enter_your_password",db="project_aers").cursor()

MIN_REPORTS = 10

# Load up the drug data.

query = """
select stitch_id, report_id
from drug_report
"""

# query = """
# select stitch_id, report_id
# from effect_stanford.mapped_drugs
# """
c.execute(query)

drug_report = dict()

for stitch_id, isr_report_id in c.fetchall():
    if stitch_id not in drug_report:
        drug_report[stitch_id] = set()
    drug_report[stitch_id].add( isr_report_id )

#Load up the indicaiton data.
query = """
select indication, report_id
from indication_report
"""

# query = """
# select icd9, report_id
# from effect_stanford.mapped_indications
# """
c.execute(query)

indication_report = dict()
for indication, isr_report_id in c.fetchall():
    if indication not in indication_report:
        indication_report[indication] = set()
    indication_report[indication].add( isr_report_id )

drug_cids = sorted([k for k,v in drug_report.items() if len(v) &gt;= MIN_REPORTS])
indications = sorted([k for k,v in indication_report.items() if len(v) &gt;= MIN_REPORTS])

# all_indication_report_ids = reduce(operator.or_, indication_report.values())
# all_drug_report_ids = reduce(operator.or_, drug_report.values())
common_report_ids = reduce(operator.or_, indication_report.values()) &amp; reduce(operator.or_, drug_report.values())

total_reports = len(common_report_ids)

for i,cid in enumerate(drug_cids):
    if i % 10 == 0:
        print &gt;&gt; sys.stderr, "Building data for drug %d of %d" % (i, len(drug_cids))
    
    for indication in indications:
        
        n11 = len(drug_report[cid]&amp;indication_report[indication])
        if n11 &gt; 0:
            n10 = len(drug_report[cid]-indication_report[indication])
            n01 = len(indication_report[indication]-drug_report[cid])
            n00 = total_reports - len(drug_report[cid] | indication_report[indication])
            
            phi = (n11*n00 - n10*n01)/math.sqrt((n10+n11)*(n01+n00)*(n10+n00)*(n11+n01))
            
            q = "INSERT INTO corr_drug_ind VALUES ('%s',\"%s\",%.10f,%d,%d,%d,%d);" % (cid, indication, phi, n11, n10, n01, n00)
            r = c.execute(q)
            
</pre></body></html>