<html><head><meta name="color-scheme" content="light dark"></head><body><pre style="word-wrap: break-word; white-space: pre-wrap;">"""
Create highly binned plot for Prob(drug-pair, event) in MedEffect vs PRR / P-Value
"""

import os
import sys
import MySQLdb

db = MySQLdb.connect(host="localhost", port=3306, user="root", passwd="enter_your_password",db="project_aers")
c = db.cursor()

query = """
select stitch_id1, stitch_id2, umls_id, observed, pvalue, -log10(greatest(1e-323,pvalue))
from offsides_poly
where drug1_prr is null
and drug2_prr is null
"""
c.execute(query)
data = c.fetchall()

estimates = sorted([x[3] for x in data])
num_bins = 50
bin_length = len(estimates)/num_bins
bins = [(estimates[i*bin_length], estimates[(i+1)*bin_length]) for i in range(num_bins)]
bins[-1] = (bins[-1][0], max(estimates)+0.01)


query = """
select stitch_id1, stitch_id2, umls_id
from effect_medeffect.pair_event_report_count
"""
c.execute(query)
gold_data = c.fetchall()

gold_dict = dict()
for sid1, sid2, uid in gold_data:
    key = "%s,%s" % (sid1, sid2)
    if not key in gold_dict:
        gold_dict[key] = set()
    gold_dict[key].add(uid)

binned = dict()
for bin in bins:
    binned[bin] = list()

for sid1, sid2, uid, prr, pvalue, score in data:
    key = "%s,%s" % (sid1, sid2)
    bin = [x for x in bins if x[0] &lt;= prr &lt;= x[1]][0]
    binned[bin].append(uid in gold_dict.get(key, set()))

for bin in bins:
    values = binned[bin]
    print &gt;&gt; sys.stdout, (bin[0]+bin[1])/2.0, numpy.mean(values), 1.96*numpy.std(values)/math.sqrt(len(values))
    </pre></body></html>