<html><head><meta name="color-scheme" content="light dark"></head><body><pre style="word-wrap: break-word; white-space: pre-wrap;">"""
Script for running discovery_ae.py in parallel.
"""

import os
import sys
import time
import MySQLdb
import subprocess

max_procs = 5
children = []
result_file_path = os.path.expanduser('~/Stanford/AltmanLab/aers/part4.d')

script = "src/discover_ae_p4.2.py"

db = MySQLdb.connect(host="localhost", port=3307, user="root", passwd="enter_your_password",db="project_aers")
c = db.cursor()

query = """
select stitch_id1, stitch_id2
from project_aers.pair_report_count
where report_count &gt;= 10
order by report_count desc;
"""
c.execute(query)
data = [row for row in c.fetchall()]
# drug_cids = list(set([tuple(sorted(row)) for row in data]))
drug_cids = []
for cid1, cid2 in data:
    pair = None
    if cid1 &lt; cid2:
        pair = (cid1, cid2)
    else:
        pair = (cid2, cid2)
    if not pair in drug_cids:
        drug_cids.append(pair)

while len(drug_cids) &gt; 0:
    
    while len(children) &lt; max_procs:
        cid1, cid2 = drug_cids.pop(0)
        
        result_file = "%s/%s:%s.csv" % (result_file_path, cid1, cid2)
        
        if not os.path.exists(result_file):
            # Claim this one as in process.
            os.system("touch %s" % result_file)
            child = subprocess.Popen(["python",script,cid1,cid2])
            print &gt;&gt; sys.stderr, "Started %s:%s for %s with pid: %s, (%d cids remaining)" % (cid1, cid2, script, child.pid, len(drug_cids))
            children.append(child)
    
    completed_processes = []
    for child in children:
        if not child.poll() is None:
            completed_processes.append(child)
    
    for child in completed_processes:
        children.remove(child)
    
    # just to keep it from spinning a cpu
    time.sleep(5)

print &gt;&gt; sys.stderr, "Finished discovering adverse events."</pre></body></html>