<html><head><meta name="color-scheme" content="light dark"></head><body><pre style="word-wrap: break-word; white-space: pre-wrap;">"""
Discover adverse events significantly associated with drugs.
"""

import R
import os
import csv
import sys
import random
import MySQLdb
import tempfile
import operator
from namedmatrix import NamedMatrix

db = MySQLdb.connect(host="localhost", port=3307, user="root", passwd="enter_your_password",db="project_aers")
c = db.cursor()

# First I want to count the number of reports for each possible pair of drug (!)
# This is (hopefully) allow me to restrict my analysis to drug pairs with lots of reports.
query = """
select report_id, stitch_id
from drug_report_event;
"""
c.execute(query)

report2stitch = dict()
for row in c.fetchall():
    if not report2stitch.has_key(row[0]):
        report2stitch[row[0]] = set()
    
    report2stitch[row[0]].add(row[1])


pair2count = dict()
for rid, drugs in report2stitch.items():
    
    for d1 in drugs:
        for d2 in drugs:
            if not d1 == d2:
                pair = None
                if d1 &lt; d2:
                    pair = "%s:%s" % (d1, d2)
                else:
                    pair = "%s:%s" % (d2, d1)
                
                if pair is None:
                    raise Exception("pair is None, this should never happen.")
                
                if not pair2count.has_key(pair):
                    pair2count[pair] = 0
                pair2count[pair] += 1


outfh = open('part4_pair_report_count.txt','w')

for pair, count in pair2count.items():
    d1 = pair.split(':')[0]
    d2 = pair.split(':')[1]
    
    print &gt;&gt; outfh, "%s\t%s\t%d" % (d1, d2, count)
    outfh.flush()

outfh.close()

#### ------------------------------------------------------------------------
# The above code counted how many reports we have for each pair,
# which I then stored in pair_report_count, and we can use from there.
#### ------------------------------------------------------------------------

query = """
select stitch_id1, stitch_id2
from project_aers.pair_report_count
where report_count &gt;= 10;
"""
c.execute(query)
pairs = [row[:2] for row in c.fetchall()]

</pre></body></html>