<html><head><meta name="color-scheme" content="light dark"></head><body><pre style="word-wrap: break-word; white-space: pre-wrap;">#!/usr/bin/env python
# encoding: utf-8
"""
fdr.py

Created by Nicholas Tatonetti on 2011-01-22.
Copyright (c) 2011 Stanford University. All rights reserved.
"""

import os
import csv
import sys
import numpy
import MySQLdb

if __name__ == '__main__':
    
    
    # query = """
    # select fd, count(*)
    # from
    # (
    #     select a.stitch_id, a.umls_id, max(gold) as fd
    #     from 
    #     (
    #         select *
    #         from pred_drug_events_%s
    #         join umls_list using (umls_id)
    #         where prr &gt; %d
    #         order by rand()
    #         limit 1000
    #     ) a
    #     join corr_drug_drug corr on (a.stitch_id = stitch_id1)
    #     left join gold_drug_ae gold on (gold.stitch_id = stitch_id2 and a.umls_id = gold.umls_id)
    #     where corr.estimate &gt; 0.05
    #     and corr.database = 'aers'
    #     group by a.stitch_id, a.umls_id
    # ) preds
    # group by fd;
    # """
    query = """
    select fd, count(*)
    from
    (
        select a.stitch_id, a.umls_id, max(gold) as fd
        from 
        (
            select *
            from pred_drug_events_%s
            join indication_drugs using (stitch_id)
            join umls_list using (umls_id)
            where prr &gt; %d
            order by rand()
            limit 1000
        ) a
        join corr_drug_ind corr using (stitch_id)
        left join 
        (
            select indication, umls_id, 1 as gold
            from indication_events 
            where frequency &gt; 0.02
        ) gold using (indication, umls_id)
        where corr.estimate &gt; 0.05
        and corr.database = 'aers'
        group by a.stitch_id, a.umls_id
    ) preds
    group by fd;
    """
    
    fdr = dict()
    
    cutoffs = [2]
    
    for name in ['e','e5', 'b','null']:
        
        fdr[name] = dict()
        
        for cutoff in cutoffs:
            print &gt;&gt; sys.stdout, name, cutoff
            vec = []
            
            for i in range(20):
                
                db = MySQLdb.connect(host="localhost", port=3307, user="root", passwd="enter_your_password",db="project_aers")
                c = db.cursor()
                c.execute(query % (name, cutoff))
                
                results = c.fetchall()
                
                if len(results) == 2:
                    if results[0][0] is None:
                        true = results[0][1]
                        false = results[1][1]
                    else:
                        true = results[1][1]
                        false = results[0][1]
                
                    vec.append(float(false)/float(false + true))
            
            if len(vec) &gt;= 1:
                fdr[name][cutoff] = (numpy.mean(vec), numpy.std(vec))
    
    
    for name in ['e','b','null']:
        
        outfh = open(os.path.expanduser('~/Dropbox/fdr_plot_%s.csv' % name),'w')
        writer = csv.writer(outfh)
        
        for cutoff in cutoffs:
            writer.writerow([cutoff, fdr[name][cutoff][0], fdr[name][cutoff][1]])
        
        outfh.close()
</pre></body></html>