<html><head><meta name="color-scheme" content="light dark"></head><body><pre style="word-wrap: break-word; white-space: pre-wrap;">
import os
import sys
import csv

cmap2stitch = dict()
for cid, iid, name1, name2 in csv.reader(open('cmap_cmap2stitch.txt'), delimiter='\t'):
    cmap2stitch[iid] = cid

probe2gene = dict()
for pid, gid in csv.reader(open('cmap_probe2gene.txt'), delimiter='\t'):
    probe2gene[pid] = gid

targets = set()
targets_genes = set()
targets_drugs = set()
for cid, gid, source in csv.reader(open('cmap_targets.txt'), delimiter='\t'):
    key = "%s,%s" % (cid, gid)
    targets.add(key)
    targets_genes.add(gid)
    targets_drugs.add(cid)


ranks = dict()
for pid, iid, rank in csv.reader(open('cmap_ranks.txt'), delimiter='\t'):
    try:
        key = "%s,%s" % (cmap2stitch[iid], probe2gene[pid])
    except KeyError:
        continue
    if key in targets:
        if not key in ranks:
            ranks[key] = list()
        ranks[key].append(rank)

x = [sum(map(float, r))/len(r) for r in ranks.values()]

outfh = open('../rohan_files/getting_prob_ids_to_drugs/cmapvalidation/cmaptargets.txt','w')
writer = csv.writer(outfh)

for gid, cid, rank, a, b in csv.reader(open('../rohan_files/getting_prob_ids_to_drugs/cmapvalidation/cmap100')):
    value = 0
    if '%s,%s' % (cid, gid) in targets:
        value = 1
    writer.writerow([gid, cid, rank, value, int(cid in targets_drugs), int(gid in targets_genes)])

outfh.close()
</pre></body></html>