<html><head><meta name="color-scheme" content="light dark"></head><body><pre style="word-wrap: break-word; white-space: pre-wrap;">#!/usr/bin/env python
# encoding: utf-8
"""
build_random_reports.py

Created by Nicholas Tatonetti on 2010-09-30.
Copyright (c) 2010 Stanford University. All rights reserved.
"""

import csv
import sys
import random
import MySQLdb

db = MySQLdb.connect(host="localhost", port=3307, user="root", passwd="enter_your_password",db="project_aers")
c = db.cursor()

query = """
select report_id, stitch_id, umls_id
from drug_report_event
"""
c.execute(query)
data = c.fetchall()

reportdrugs = set()
report2event = dict()

for rid, sid, uid in data:
    reportdrugs.add((rid,sid))
    if not rid in report2event:
        report2event[rid] = set()
    
    report2event[rid].add(uid)

reportdrugs = sorted(reportdrugs)
drugs = [x[1] for x in reportdrugs]
random.shuffle(drugs)

random_report2drug = dict()

for i, (rid, cid) in enumerate(reportdrugs):
    if not rid in random_report2drug:
        random_report2drug[rid] = set()
    
    random_report2drug[rid].add(drugs[i])

outfh = open('drug_report_events_random.csv','w')

for rid, cids in random_report2drug.items():
    for cid in cids:
        for uid in report2event[rid]:
            print &gt;&gt; outfh, "%s,%s,%s" % (rid, cid, uid)

outfh.close()

# load this file with mysql load data infile ...</pre></body></html>