# -*- coding: utf-8 -*-
'''Functions for performing network propagation
'''
import networkx as nx
import numpy as np
import pandas as pd
import warnings
[docs]
def get_normalized_adjacency_matrix(graph, conserve_heat=True, weighted=False):
"""
Returns normalized adjacency matrix (W'), as detailed in:
Vanunu, Oron, et al. 'Associating genes and protein complexes with disease
via network propagation.'
With version `0.1.6` and newer, the :py:class:`networkx.Graph`
can be directly passed into
:py:func:`~netcoloc.netprop.get_individual_heats_matrix` and
this method will be invoked to create the normalized adjacency matrix
.. note::
Resulting matrix from this function can be saved to a file with :py:func:`numpy.save`
and loaded later with :py:func:`numpy.load`, but resulting file can be several gigabytes
and take a minute or more to save/load.
.. code-block:: python
numpy.save('nam.npy', adjacency_matrix)
adjacency_matrix = numpy.load('nam.npy')
:param graph: Interactome from which to calculate normalized
adjacency matrix.
:type graph: :py:class:`networkx.Graph`
:param conserve_heat: If ``True``, heat will be conserved
(ie. the sum of the heat vector will be equal to 1),
and the graph will be asymmetric. Otherwise, heat will
not be conserved, and the graph will be symmetric.
:type conserve_heat: bool
:param weighted: If ``True``, then the graph's edge weights
will be taken into account. Otherwise, all edge weights
will be set to 1.
:type weighted: bool
:return: Square normalized adjacency matrix
:rtype: :py:class:`numpy.ndarray`
"""
if isinstance(graph, np.ndarray):
graph = nx.from_numpy_array(graph)
if isinstance(graph, nx.DiGraph) or isinstance(graph, nx.MultiGraph) or isinstance(graph, nx.MultiDiGraph):
raise ValueError("Input graph must be a networkx.Graph object. Directed and MultiGraphs are not supported.")
assert 0 not in dict(graph.degree).values(), "Graph cannot have nodes with degree=zero"
# assert graph is nx.Graph object
# Create graph
if conserve_heat:
# If conserving heat, make G_weighted a di-graph (not symmetric)
graph_weighted = nx.DiGraph()
else:
# If not conserving heat, make G_weighted a simple graph (symmetric)
graph_weighted = nx.Graph()
# Create edge weights
edge_weights = []
node_to_degree_dict = dict(graph.degree)
if weighted and not nx.is_weighted(G=graph):
warnings.warn("Input graph is not weighted. All edge weights will be set to 1.")
for e in graph.edges(data=True):
v1 = e[0]
v2 = e[1]
deg1 = node_to_degree_dict[v1]
deg2 = node_to_degree_dict[v2]
if weighted and nx.is_weighted(G=graph):
weight = e[2]['weight']
else:
weight = 1
if conserve_heat:
# created asymmetrically weighted edges - each directed edge u->v normalized by the degree of v
edge_weights.append((v1, v2, weight / float(deg1)))
edge_weights.append((v2, v1, weight / float(deg2)))
else:
# normalize single undirected edge by the degree of both endpoints as per Vanunu, Oron, et al. 2010
edge_weights.append((v1, v2, weight / np.sqrt(deg1 * deg2)))
# Apply edge weights to graph
graph_weighted.add_weighted_edges_from(edge_weights)
# Transform graph to adjacency array
if len(graph.nodes) != len(graph_weighted):
raise ValueError("Input graph has nodes with zero degrees. Please remove these nodes.")
w_prime = nx.to_numpy_array(graph_weighted, nodelist=graph.nodes())
return w_prime
[docs]
def get_individual_heats_matrix(nam_or_graph, alpha=0.5,
conserve_heat=True, weighted=False):
"""
Returns the pre-calculated contributions of each individual gene in the
interactome to the final heat of each other gene in the interactome after
propagation.
.. versionchanged:: 0.1.6
In addition, to a normalized adjacency matrix, this function
now also supports :py:class:`networkx.Graph` network as input
If a :py:class:`networkx.Graph` network is passed in as the **nam_or_graph**
parameter, the function :py:func:`~netcoloc.netprop.get_normalized_adjacency_matrix`
is called to generate the normalized adjacency matrix using **conserve_heat** and
**weighted** parameters
.. note::
Resulting matrix from this function can be saved to a file with :py:func:`numpy.save`
and loaded later with :py:func:`numpy.load`, but resulting file can be several gigabytes
and take a minute or more to save/load.
.. code-block:: python
numpy.save('heats_matrix.npy', w_double_prime)
w_double_prime = numpy.load('heats_matrix.npy')
:param nam_or_graph: square normalized
adjacency matrix or network
:type nam_or_graph: :py:class:`numpy.ndarray` or :py:class:`networkx.Graph`
:param alpha: heat dissipation coefficient between 1 and 0. The
contribution of the heat propagated from adjacent nodes in
determining the final heat of a node, as opposed to the contribution
from being a part of the gene set initially
:type alpha: float
:param conserve_heat: If ``True``, heat will be conserved
(ie. the sum of the heat vector will be equal to 1),
and the graph will be asymmetric. Otherwise, heat will
not be conserved, and the graph will be symmetric.
**NOTE:** Only applies if **nam_or_graph** is :py:class:`networkx.Graph`
:type conserve_heat: bool
:param weighted: If ``True``, then the graph's edge weights
will be taken into account. Otherwise, all edge weights
will be set to 1.
**NOTE:** Only applies if **nam_or_graph** is :py:class:`networkx.Graph`
:type weighted: bool
:return: square individual heats matrix
:rtype: :py:class:`numpy.ndarray`
"""
assert 1 >= alpha >= 0, "Alpha must be between 0 and 1"
nam = nam_or_graph
if isinstance(nam_or_graph, nx.Graph):
nam = get_normalized_adjacency_matrix(nam_or_graph,
conserve_heat=conserve_heat,
weighted=weighted)
nam = np.transpose(nam)
d_name = np.linalg.inv(np.identity(nam.shape[0]) - alpha * nam) * (1 - alpha)
return d_name
[docs]
def network_propagation(individual_heats_matrix, nodes, seed_genes):
"""
Implements network propagation, as detailed in:
Vanunu, Oron, et al. 'Associating genes and protein complexes with
disease via network propagation.'
Using this function, the final heat of the network is calculated directly,
instead of iteratively. This method is faster when many different
propagations need to be performed on the same network (with different seed
gene sets). It is slower than
:py:func:`~netcoloc.netprop.iterative_network_propagation` for a
single propagation.
:param individual_heats_matrix: Square matrix that is the
output of :py:func:`~netcoloc.netprop.get_individual_heats_matrix`
:type individual_heats_matrix: :py:class:`numpy.ndarray`
:param nodes: List of nodes in the network represented by the
individual_heats_matrix, in the same order in which they were
supplied to :py:func:`~netcoloc.netprop.get_individual_heats_matrix`
:type nodes: list
:param seed_genes: Input list of genes/nodes for intializing the heat in network propagation.
Any items in `seed genes` that are not present in `nodes` will be ignored.
:type seed_genes: list
:return: Final heat of each node after propagation, with the name
of the nodes as the index
:rtype: :py:class:`pandas.Series`
"""
# Remove genes that are not in network
seed_genes = list(np.intersect1d(nodes, seed_genes))
# Initialize results vector
F = np.zeros(len(nodes))
# Add up resulting heats from each gene in seed genes set
for gene in seed_genes:
# TODO check that this is the correct orientation
F += individual_heats_matrix[:,nodes.index(gene)]
# Normalize results by number of seed genes
F /= len(seed_genes)
#Return as pandas series
# TODO does this need to be a pandas series?
return pd.Series(F, index=nodes)