<html><head><meta name="color-scheme" content="light dark"></head><body><pre style="word-wrap: break-word; white-space: pre-wrap;">"""
Create ROC plots for the list of events (one per event).
"""

import os
import sys
import csv
import MySQLdb
from namedmatrix import NamedMatrix
from pyweka import MachineLearning as ml

if __name__ == '__main__':
    
    db = MySQLdb.connect(host="localhost", port=3307, user="root", passwd="enter_your_password",db="project_aers")
    c = db.cursor()
    
    # Extract events
    query = """
    select umls_id
    from gold_distinct_events
    left join eval_auroc_sider_lf_prr_e5 using (umls_id)
    where num_pos is null
    """
    c.execute(query)
    
    events = [x[0] for x in c.fetchall()]
    
    # outfh = open('evaluate-by-event-results-e5-vs-p.csv','w')
    # writer = csv.writer(outfh)
    # writer.writerow(['event','pAUROC','eAUROC','numpos','numeg'])
    
    for event in events:
        
        query = """
        select stitch_id, umls_id, e.prr, p.prr, gold is not null as label
        from pred_drug_events_e5 e
        join prop_pred_drug_events p using (stitch_id, umls_id)
        join gold_distinct_drugs using (stitch_id)
        join gold_distinct_events using (umls_id)
        left join gold_drug_ae using (stitch_id, umls_id)
        left join likelyfalse_drug_ae using (stitch_id, umls_id)
        where umls_id = '%s'
        and (gold is not null or `false` is not null)
        """ % event
        nrows = c.execute(query)
        
        if nrows &gt; 10:
            data = c.fetchall()
            depairs = ['%s-%s' % (x[0], x[1]) for x in data]
            labels = [x[-1] for x in data]
            
            feat = NamedMatrix(None, depairs, ['ePRR', 'pPRR'])
            for i,row in enumerate(data):
                feat[i,0] = float(row[2])
                feat[i,1] = float(row[3])
            
            lr = ml.Logistic(feat[:,0], labels)
            e_results = lr.cross_validate()
            
            lr = ml.Logistic(feat[:,1], labels)
            p_results = lr.cross_validate()
            
            print &gt;&gt; sys.stderr, event, p_results['AUROC'], e_results['AUROC'], sum(labels), nrows
            c.execute("insert into eval_auroc_sider_lf_prr_e5 values ('%s',%f,%f,%d,%d)" % (event, p_results['AUROC'], e_results['AUROC'], sum(labels), nrows))
            # writer.writerow([event, p_results['AUROC'], e_results['AUROC'], sum(labels), nrows])
    
    # outfh.close()</pre></body></html>