<html><head><meta name="color-scheme" content="light dark"></head><body><pre style="word-wrap: break-word; white-space: pre-wrap;">"""Code for reading networks (eg, protein-protein interaction
networks) and creating networkx graphs therefrom.

Steve Bagley, steven.bagley@stanford.edu, summer 2014

"""

import pandas as pd
import networkx as nx


def Read_PPI_Graph():
    """Reads protein_protein interaction file, returns graph containing
    edges for each row (relation). Interaction type is stored as
    attribute of each edge.

    """
    csvfile = '../data/PPI_File.csv'
    df = pd.read_csv(csvfile, skiprows=7)
    node1 = df['Gene 1 (Symbol)']
    node2 = df['Gene 2 (Symbol)']
    edge_type = df['Interaction type']
    graph = nx.Graph([(f, t, {'type': e})
                      for f, t, e in zip(node1, node2, edge_type)])
    return graph


def Read_Genes(file):
    """Reads list of genes from a file, one per line

    """
    # this also works:
    # with open(file, 'r') as f:
    #     return [s.strip() for s in f.readlines()]
    return list(pd.read_csv(file, header=None, squeeze=True))


def write_lists(list_of_lists, file, columns=[], index=False):
    """Writes out a list of lists to a file in csv format.

    Input, eg: [['a', 1, 2], ['b', 3, 5], ['c', 4, -1]] Columns is
    list of which columns (index from 0) to write, [] means all
    columns. Index controls whether to write the row numbers
    (index).

    """

    if columns == []:
        pd.DataFrame(list_of_lists).to_csv(file, index=index)
    else:
        pd.DataFrame(list_of_lists)[columns].to_csv(file, index=index)


def PPI_Graph():

    """Reads genes/interactions from PPI file, builds graph, and removes
    those nodes in isolated connected components.

    """
    graph = Read_PPI_Graph()
    # return only the first (largest) connected component.
    return nx.connected_component_subgraphs(graph)[0]


def Create_Path_Shortest(gene1, gene2, graph):
    """Returns list of shortest paths from gene1 to gene2 in graph. Each
    path is a list of two or more nodes (gene names).

    """
    try:
        return list(nx.all_shortest_paths(graph, gene1, gene2))
    except:
        return
</pre></body></html>