<html><head><meta name="color-scheme" content="light dark"></head><body><pre style="word-wrap: break-word; white-space: pre-wrap;">#!/usr/bin/env python
# encoding: utf-8
"""
estimate_backgrounds.py

Created by Nicholas Tatonetti on 2011-01-22.
Copyright (c) 2011 Stanford University. All rights reserved.
"""

import os
import sys
import numpy
import random
import MySQLdb

if __name__ == '__main__':
    db = MySQLdb.connect(host="localhost", port=3307, user="root", passwd="enter_your_password",db="project_aers")
    c = db.cursor()
    
    query = """
    select report_id, stitch_id, umls_id
    from drug_report_event
    """
    
    c.execute(query)
    
    event2report = dict()
    
    events = set()
    reports = set()
    
    for report_id, stitch_id, umls_id in c.fetchall():
        
        if not umls_id in event2report:
            event2report[umls_id] = set()
        
        event2report[umls_id].add(report_id)
        events.add(umls_id)
        reports.add(report_id)
    
    num_reports = len(reports)
    
    for event in events:
        
        num = len(event2report[event])
        report_vector = [1]*num + [0]*(num_reports - num)
        
        sample_size = min(10000,int(0.1*num))
        
        samples = [numpy.mean(random.sample(report_vector, sample_size)) for i in range(1000)]
        
        mu = numpy.mean(samples)
        sd = numpy.std(samples)
        
        try:
            c.execute("insert into event_backgrounds values ('%s',%f,%f,%d,%d)" % (event, mu, sd, num, num_reports))
        except:
            pass
        
    
</pre></body></html>