<html><head><meta name="color-scheme" content="light dark"></head><body><pre style="word-wrap: break-word; white-space: pre-wrap;">"""
Using linear models to establish the presence of class-wide interactions
"""

import os
import csv
import sys
import MySQLdb
import rpy2.robjects as robjects

c = MySQLdb.connect(host="localhost", port=3306, user="root", passwd="enter_your_password",db="project_aers").cursor()

# Get the adverse event classes
query = """
select cui, `desc`
from effect_sider.costart_top
"""
c.execute(query)

event_classes = dict()
for cui, desc in c.fetchall():
    event_classes[cui] = desc

eclasses = sys.argv[1].split(',')

for eclass in eclasses:
    
    print &gt;&gt; sys.stderr, "Evaluating interactions for %s" % event_classes[eclass]
    
    query = """
    select a.first5, b.first5
    from effect_sider.costart_top
    join effect_sider.costart_categories using (cui)
    join project_aers.offbothsides using (umls_id)
    join compound_stitch.atc2stitch a on (a.stitch_id = stitch_id1)
    join compound_stitch.atc2stitch b on (b.stitch_id = stitch_id2)
    where cui = '%s'
    # and t_statistic &gt; 5
    group by a.first5, b.first5
    having count(*) &gt;= 50
    """ % eclass
    c.execute(query)
    drug_class_pairs = c.fetchall()

    print &gt;&gt; sys.stderr, "Found %d drug_class_pairs with enough data." % len(drug_class_pairs)

    robjects.r("""
    library(RMySQL)
    mycon &lt;- dbConnect(MySQL(), user='root', dbname='project_aers', host='localhost', port=3306, password='enter_your_password')

    data &lt;- dbGetQuery(mycon, "
    select stitch_id1, stitch_id2, umls_id, a.first5 afirst5, b.first5 bfirst5, observed
    from effect_sider.costart_top
    join effect_sider.costart_categories using (cui)
    join project_aers.offbothsides using (umls_id)
    join compound_stitch.atc2stitch a on (a.stitch_id = stitch_id1)
    join compound_stitch.atc2stitch b on (b.stitch_id = stitch_id2)
    where cui = '%s'
    # and t_statistic &gt; 5
    ")
    """ % eclass )

    completed = set()

    for i, (dclass1, dclass2) in enumerate(drug_class_pairs):
        if (i+1)%100 == 0:
            print &gt;&gt; sys.stderr, "Completed %d of %d" % (i, len(drug_class_pairs))
    
        key = '%s,%s' % tuple(sorted([dclass1, dclass2]))
        if not dclass1 == dclass2 and not key in completed:
            completed.add(key)
            # evaluate interactions in pairs of top 100 drugs.
            robjects.r("""
            inA = as.factor(data$afirst5 == '%s')
            inB = as.factor(data$bfirst5 == '%s')
            """ % (dclass1, dclass2))
            num_in_a = robjects.r("sum(inA==TRUE)")[0]
            num_in_b = robjects.r("sum(inB==TRUE)")[0]
            num_in_both = robjects.r("sum(inA==TRUE &amp; inB==TRUE)")[0]
        
            if num_in_both == num_in_a or num_in_both == num_in_b:
                continue
        
            if num_in_both &gt;= 3:
                robjects.r("fit0 &lt;- lm(data$observed ~ inA + inB + inA*inB)")
                y = robjects.r("summary(fit0)")
            
                pvalues = [y[3][13],y[3][14],y[3][15]]
                estimates = [y[3][0],y[3][1],y[3][2],y[3][3]]
            
                query = """
                insert into project_aers.eval_pair_eclass_drugmean values ('%s','%s','%s', %f,%f,%f,%f, %.10e,%.10e,%.10e)
                """ % tuple([dclass1, dclass2, eclass] + estimates + pvalues)
                c.execute(query)
                # writer.writerow([dclass1, dclass2, eclass] + estimates + pvalues)
            else:
                print &gt;&gt; sys.stderr, "Only %d %s combos for %s and %s found, skipping." % (num_in_both, eclass, dclass1, dclass2)

</pre></body></html>