import igraph as ig
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

n_nodes = 2102
PRmatrix = pd.read_csv('/home/jovyan/lab/data/serialization/PRmatrix.tsv', sep="\t", index_col=False)
fdr_pos_mat = pd.read_csv('/home/jovyan/lab/data/serialization/fdr_pos_mat.tsv', sep="\t", index_col=False)
knnG = pd.read_csv('/home/jovyan/lab/data/serialization/knnG.tsv', sep="\t", index_col=False)

### Generates each of the graphs
#positive associations, weighted
pos_w=ig.Graph.TupleList([tuple(x) for x in fdr_pos_mat.values], directed=False, edge_attrs=['w'])

#full network, unweighted
edge_list=PRmatrix.copy().loc[PRmatrix.isin(pd.unique(fdr_pos_mat[['feat1','feat2']].values.flatten())).sum(1)==2,['feat1','feat2']].values
all_u=ig.Graph.TupleList([tuple(x) for x in edge_list], directed=False)

#knnG, unweighted
knn=ig.Graph.TupleList([tuple(x) for x in knnG.values], directed=False)

#random network, unweighted, node and edge number based on a network of the same size
random_posw=ig.Graph.Erdos_Renyi(n=n_nodes, m=len(fdr_pos_mat.values), directed=False, loops=False)
random_allu=ig.Graph.Erdos_Renyi(n=n_nodes, m=len(edge_list), directed=False, loops=False)
random_knn=ig.Graph.Erdos_Renyi(n=n_nodes, m=len(knnG.values), directed=False, loops=False)

import random
import numpy as np

#random subset of knn graph for plotting
short_knn=ig.Graph.TupleList([tuple(x) for x in knnG.values[random.sample(list(np.arange(len(knnG.values))), 5000)]], directed=False)

#This plots each graph, using degree to present node size:
short_knn.vs['degree']=short_knn.degree() 
short_knn.vs['degree_size']=[(x*15)/(max(short_knn.vs['degree'])) for x in short_knn.vs['degree']] #degree is multiplied by 10 so that we can see all nodes

layout = short_knn.layout_mds()
ig.plot(short_knn, layout=layout, vertex_color='white', edge_color='silver', vertex_size=short_knn.vs['degree_size'])

#function to get graph properties, takes a few minutes to run
def graph_prop(input_graph):
    ncount=nn.vcount()
    ecount=nn.ecount()
    diameter=nn.diameter()
    av_path=nn.average_path_length()
    dens=nn.density()
    clustering=nn.transitivity_undirected() #this is the global clustering coefficient
    conn=nn.is_connected()
    min_cut=nn.mincut_value()
    out=pd.DataFrame([ncount, ecount, diameter, av_path, dens, clustering, conn, min_cut],
                 index=['node_count','edge_count','diameter','av_path_length','density','clustering_coef','connected?','minimum_cut']).T
    return(out)

#summarizing graph properties
network_stats=pd.DataFrame()
for nn in [pos_w, all_u, knn, random_posw, random_allu, random_knn]:
    network_stats=pd.concat([network_stats,graph_prop(nn)])
    
network_stats.index=['pos_w','all_u','knn','pos_w_random', 'all_u_random', 'knn_random']
network_stats

# TODO: to/from json
degree_data =  {
        'nodes': list(range(n_nodes)),
        'degree_pos_w': pos_w.degree(),
        'degree_all_u': all_u.degree(),
        'degree_knn': knn.degree(),
        'degree_random_posw': random_posw.degree(),
        'degree_random_allu': random_allu.degree(),
        'degree_random_knn': random_knn.degree(),
    }

import numpy as np

graphs = {"pos_w": pos_w, "all_u": all_u, "knn": knn, "random_posw": random_posw, "random_allu": random_allu, "random_knn": random_knn}
centralities_list=['degree (larger ~ central)','eccentricity (smaller ~ central)','betweenness (larger ~ central)', 'closeness (larger ~ central)','eigenvector (larger ~ central)']

def compute_all_centralities(graph, graph_name):
    """Computes centrality metrics for a graph.
    """
    deg = graph.degree(loops=False)
    node_n = graph.vcount()
    # scaled to account for network size
    ecc = [(2*x / ((node_n-1)*(node_n-2))) for x in graph.eccentricity()]
    btw = [(2*x / ((node_n-1)*(node_n-2))) for x in graph.betweenness(directed=False)] 
    eig = graph.eigenvector_centrality(directed=False, scale=False)
    # For disconnected graphs, computes closeness from the largest connected component
    if(graph.is_connected()):
        cls = graph.closeness(normalized=True)
    else:
        cls = graph.clusters(mode='WEAK').giant().closeness(normalized=True)
    
    df = pd.DataFrame([deg, ecc, btw, cls, eig], index = centralities_list).T
    df['graph'] = graph_name
    df = df.loc[:, np.append(['graph'], 
                             df.columns[df.columns!='graph'])] # reorder columns
    return df
    

network_centralities_raw=pd.DataFrame()
for index, (graph_name, graph) in enumerate(graphs.items()):
    df = compute_all_centralities(graph, graph_name)
    ##Adds centralities for each node in the network
    graph.vs['degree'] = df['degree (larger ~ central)']
    graph.vs['eccentricity'] = df['eccentricity (smaller ~ central)']
    graph.vs['betweenness'] = df['betweenness (larger ~ central)']
    graph.vs['closeness'] = df['closeness (larger ~ central)']
    graph.vs['eigenvector'] = df['eigenvector (larger ~ central)']
    network_centralities_raw = pd.concat([network_centralities_raw, df])

network_centralities_raw.head()

network_centralities_raw.to_csv("/home/jovyan/lab/data/serialization/network_centralities_raw.csv", sep = "\t", index = False)

n_nodes * 6

12612

full_centralities=pd.DataFrame()
for net in [0,1,2]:
    net_in=[pos_w, all_u, knn][net]
    net_nm=['pos_w', 'all_u', 'knn'][net]
    temp=pd.DataFrame([net_in.vs[att] for att in ['name','degree','betweenness', 'closeness','eccentricity','eigenvector']], index=['name','degree','betweenness', 'closeness','eccentricity','eigenvector']).T
    temp.columns=[x+'|'+net_nm for x in temp.columns]
    temp.rename(columns={'name|'+net_nm:'name'}, inplace=True)
    
    ## For all but eccentricity centrality, we compute the rank in ascending mode
    ## so that higher ranking means more central. we need to reverse this for eccentricity
    temp.loc[:,temp.columns.str.contains('deg|bet|clos|eig')]=temp.loc[:,temp.columns.str.contains('deg|bet|clos|eig')].rank(pct=True, ascending=True)
    temp.loc[:,temp.columns.str.contains('eccentricity')]=temp.loc[:,temp.columns.str.contains('eccentricity')].rank(pct=True, ascending=False
                                                              )
    temp['median_centrality|'+net_nm]=temp.loc[:,temp.columns!='name'].median(1)
    if(net==0):
        full_centralities=temp
    else:
        full_centralities=pd.merge(full_centralities, temp, on='name')
full_centralities.set_index('name', inplace=True)
#full_centralities=pd.merge(full_centralities, data[['Type']], left_index=True, right_index=True, how='left')
full_centralities['median|ALL']=full_centralities.loc[:,full_centralities.columns.str.contains('median')].median(1)


### Correlations are computed between ranks, after inverting the rank for eccentricity
def correlations_centralities(graph_name):
    """
    Returns squared correlation matrix.
    """
    temp_corr=full_centralities.copy().loc[:,full_centralities.columns!='Type'].dropna().astype('float')
    temp_corr=temp_corr.loc[:,temp_corr.columns.str.contains(graph_name)]
    temp_corr.columns=temp_corr.columns.str.replace('\|.+','')
    temp_corr=temp_corr.corr(method='spearman')
    np.fill_diagonal(temp_corr.values, np.nan)
    return(temp_corr)

all_u_centcorr=correlations_centralities('all_u')
knn_centcorr=correlations_centralities('knn')
pos_w_centcorr=correlations_centralities('pos_w')

all_u_centcorr

all_u_centcorr.to_csv("/home/jovyan/lab/data/serialization/all_u_centcorr.csv", sep = "\t", index = True)
knn_centcorr.to_csv("/home/jovyan/lab/data/serialization/knn_centcorr.csv", sep = "\t", index = True)
pos_w_centcorr.to_csv("/home/jovyan/lab/data/serialization/pos_w_centcorr.csv", sep = "\t", index = True)

full_centralities.columns.values

array(['degree|pos_w', 'betweenness|pos_w', 'closeness|pos_w',
       'eccentricity|pos_w', 'eigenvector|pos_w',
       'median_centrality|pos_w', 'degree|all_u', 'betweenness|all_u',
       'closeness|all_u', 'eccentricity|all_u', 'eigenvector|all_u',
       'median_centrality|all_u', 'degree|knn', 'betweenness|knn',
       'closeness|knn', 'eccentricity|knn', 'eigenvector|knn',
       'median_centrality|knn', 'median|ALL'], dtype=object)

knn_centralities=full_centralities.loc[:,full_centralities.columns.str.contains('knn')]
knn_centralities=knn_centralities.loc[:,knn_centralities.columns!='median_centrality|knn']
knn_centralities.columns=knn_centralities.columns.str.replace('\\|.+','')

##top nodes based on eccentricity
knn_top_ecc=knn_centralities['eccentricity|knn'].sort_values(ascending=False).index.values[:1]

##top nodes based on degree
knn_top_deg=knn_centralities['degree|knn'].sort_values(ascending=False).index.values[:1]

##random nodes to help visualize
##warning, do not increase this value much higher than 500, or you may have problems rendering this image
knn_others=knn_centralities.sample(500).index.values

## full list
node_list=np.append(np.append(knn_top_deg, knn_top_ecc), knn_others)

## subsets nodes
knn_to_draw=knn.subgraph(knn.vs.select([x.index for x in knn.vs if x['name'] in node_list]))
knn_to_draw.vs['color']=['red' if x['name']  in knn_top_deg else 'green' if x['name'] in knn_top_ecc else 'white' for x in knn_to_draw.vs]

layout = knn_to_draw.layout_auto()
ig.plot(knn_to_draw, layout=layout, vertex_color=knn_to_draw.vs['color'], edge_color='silver', vertex_size=7)

g = ig.Graph([(0,1), (0,2), (1,2), (0,3), (2,3), (1,3), (2,4), (4,5), (5,6), (4,6), (4,7), (5,7), (6,7)])

g.vs["name"] = ["A", "B", "C", "D", "E", "F", "G","H"]
g.vs["module"] = ["f", "f", "f", "f", "m", "m","m", "f"]


layout = g.layout_circle()
g.vs["label"] = g.vs["name"]
color_dict = {"m": "cyan", "f": "pink"}
g.vs["color"] = [color_dict[module] for module in g.vs["module"]]
ig.plot(g, layout = layout, bbox = (300, 300), margin = 50)

pd.DataFrame( g.get_adjacency(), index=g.vs["name"],  columns=g.vs["name"] )

def modularity(graph, membership):
    """
    Computes the modularity of `graph` for a given module `membership`.
    """
    m=len(graph.es.indices) #edge number
    A=pd.DataFrame(graph.get_adjacency()) #adjacency matrix
    Q=[] 
    for i in A.index:
        for j in A.columns:
            if(membership[i]==membership[j]):
                deltaij=1
            else:
                deltaij=0
            Q=np.append(Q, (A.loc[i,j]-(g.vs[i].degree()*g.vs[j].degree())/(2*m))*deltaij)
    Q=(1/(2*m))*np.sum(Q)
    out=Q
    return(out)

modularity(g, [1,1,1,1,2,2,2,1])

np.float64(0.16568047337278102)

import itertools

all_memberships=[x for x in itertools.product([1,2], repeat=8)]


membership_modularity=pd.DataFrame()
for membership in all_memberships:
    group1=[g.vs['name'][x] for x in range(len(g.vs['name'])) if membership[x]==1]
    group2=[g.vs['name'][x] for x in range(len(g.vs['name'])) if membership[x]==2]
    out=pd.Series([group1, group2, membership, modularity(g, membership)], index=['comm1', 'comm2','memb','Q'])
    membership_modularity=pd.concat([membership_modularity, out], axis = 1)
    
membership_modularity=membership_modularity.T.sort_values('Q', ascending=False)
membership_modularity.head(10)

import leidenalg

pos_comm = leidenalg.find_partition(pos_w, leidenalg.ModularityVertexPartition)
pos_w_comm = leidenalg.find_partition(pos_w, leidenalg.ModularityVertexPartition, weights='w')
all_comm = leidenalg.find_partition(all_u, leidenalg.ModularityVertexPartition)
knn_comm = leidenalg.find_partition(knn, leidenalg.ModularityVertexPartition)
random_all = leidenalg.find_partition(random_allu, leidenalg.ModularityVertexPartition)
random_posw = leidenalg.find_partition(random_posw, leidenalg.ModularityVertexPartition)
random_knn = leidenalg.find_partition(random_knn, leidenalg.ModularityVertexPartition)

np.round(pos_comm.modularity,3)

np.float64(0.591)

np.round(pos_w_comm.modularity,3)

np.float64(0.591)

np.round(all_comm.modularity,3)

np.float64(0.427)

np.round(knn_comm.modularity,3)

np.float64(0.591)

np.round(random_all.modularity,3)

np.float64(0.039)

np.round(random_posw.modularity,3)

np.float64(0.047)

np.round(random_knn.modularity,3)

np.float64(0.038)

def get_community_table():
    comm_counts=pd.DataFrame()
    feat_lists=pd.DataFrame()
    for i in [0,1,2,3]:
        graph=[pos_w,pos_w,all_u,knn][i]
        comm=[pos_comm,pos_w_comm,all_comm,knn_comm][i]
        name=['pos','pos_w','all','knn'][i]
        temp=pd.DataFrame(list(zip(graph.vs['name'],[x+1 for x in comm.membership]))).rename(columns={0:'feat',1:'community'})
        counts=pd.DataFrame(temp.groupby('community')['feat'].agg(len))
        counts.columns=[name]
        comm_counts=pd.concat([comm_counts, counts], axis = 1)
        
        gl=pd.DataFrame(temp.groupby('community')['feat'].apply(list)).reset_index()
        gl['community']=['c'+str(i) for i in gl['community']]
        gl['network']=name
        gl=gl.loc[:,['network','community','feat']]
        feat_lists=pd.concat([feat_lists, gl])
        
    comm_counts.index=['c'+str(i) for i in comm_counts.index]
    return([comm_counts,feat_lists])

comm_counts, feat_lists = get_community_table()

feat_lists.head()

comm_counts

feat_lists.to_csv("/home/jovyan/lab/data/serialization/feat_lists.csv", sep = "\t", index = False)
comm_counts.to_csv("/home/jovyan/lab/data/serialization/comm_counts.csv", sep = "\t", index = True)

# RUN on: igraph
degree_data['all_u_names'] = list(all_u.copy().vs['name'])

import json
with open('/home/jovyan/lab/data/serialization/degree_data.json', 'w') as file:
    json.dump(degree_data, file)

	graph	degree (larger ~ central)	eccentricity (smaller ~ central)	betweenness (larger ~ central)	closeness (larger ~ central)	eigenvector (larger ~ central)
0	pos_w	2.0	4.532989e-07	4.532989e-07	0.259888	9.721586e-22
1	pos_w	1.0	9.065978e-07	0.000000e+00	0.282556	9.328102e-22
2	pos_w	1.0	9.065978e-07	0.000000e+00	0.325719	9.328102e-22
3	pos_w	4.0	2.719793e-06	0.000000e+00	0.296716	6.649338e-09
4	pos_w	19.0	2.266494e-06	2.620360e-04	0.294630	1.892555e-07

	degree\|all_u	betweenness\|all_u	closeness\|all_u	eccentricity\|all_u	eigenvector\|all_u	median_centrality\|all_u
degree\|all_u	NaN	0.566437	0.660516	0.361403	0.949016	0.945971
betweenness\|all_u	0.566437	NaN	0.332536	0.355912	0.507816	0.627798
closeness\|all_u	0.660516	0.332536	NaN	0.221382	0.726121	0.763778
eccentricity\|all_u	0.361403	0.355912	0.221382	NaN	0.360812	0.440496
eigenvector\|all_u	0.949016	0.507816	0.726121	0.360812	NaN	0.950041
median_centrality\|all_u	0.945971	0.627798	0.763778	0.440496	0.950041	NaN

comm1	comm2	memb	Q
[A, B, C, D]	[E, F, G, H]	(1, 1, 1, 1, 2, 2, 2, 2)	0.423077
[E, F, G, H]	[A, B, C, D]	(2, 2, 2, 2, 1, 1, 1, 1)	0.423077
[A, B, C, D, E]	[F, G, H]	(1, 1, 1, 1, 1, 2, 2, 2)	0.221893
[A, B, D]	[C, E, F, G, H]	(1, 1, 2, 1, 2, 2, 2, 2)	0.221893
[F, G, H]	[A, B, C, D, E]	(2, 2, 2, 2, 2, 1, 1, 1)	0.221893
[C, E, F, G, H]	[A, B, D]	(2, 2, 1, 2, 1, 1, 1, 1)	0.221893
[A, B, C, D, H]	[E, F, G]	(1, 1, 1, 1, 2, 2, 2, 1)	0.16568
[B, C, D]	[A, E, F, G, H]	(2, 1, 1, 1, 2, 2, 2, 2)	0.16568
[D, E, F, G, H]	[A, B, C]	(2, 2, 2, 1, 1, 1, 1, 1)	0.16568
[A, E, F, G, H]	[B, C, D]	(1, 2, 2, 2, 1, 1, 1, 1)	0.16568

	network	community	feat
0	pos	c1	[MGST3, DTWD1, INTS8, ADIPOR2, VGLL3, PIAS2, N...
1	pos	c2	[SUMO2, HNRNPA2B1, LEPROT, RPS3A, ANXA1, RPS15...
2	pos	c3	[C3_accarnitines, APLP2, HSP90AA1, RAP1A, MTDH...
3	pos	c4	[C2_accarnitines, Arg_aminoacids, His_aminoaci...
4	pos	c5	[MT-ND2, MT-ATP6, MT-ND5, MT-ND4, MT-CO2, MT-A...

	pos	pos_w	all	knn
c1	907	902	686.0	539.0
c2	555	556	653.0	487.0
c3	507	506	613.0	462.0
c4	119	122	145.0	347.0
c5	9	11	3.0	267.0
c6	3	3	2.0	NaN
c7	2	2	NaN	NaN

Topology lab, part 2¶

Network - preliminary analysis¶

Questions:¶

Centrality analysis¶

Community analysis¶

Modularity of a small graph¶

Modularity of gene-metabolite networks¶

	node_count	edge_count	diameter	av_path_length	density	clustering_coef	connected?	minimum_cut
pos_w	2102	293544	8	2.933039	0.132937	0.71453	False	0.0
all_u	2102	402360	7	2.11979	0.182216	0.610098	False	0.0
knn	2102	420400	4	2.320016	0.190386	0.590294	True	203.0
pos_w_random	2102	293544	2	1.867063	0.132937	0.132881	True	232.0
all_u_random	2102	402360	2	1.817784	0.182216	0.182199	True	319.0
knn_random	2102	420400	2	1.809614	0.190386	0.190406	True	341.0

	A	B	C	D	E	F	G	H
A	0	1	1	1	0	0	0	0
B	1	0	1	1	0	0	0	0
C	1	1	0	1	1	0	0	0
D	1	1	1	0	0	0	0	0
E	0	0	1	0	0	1	1	1
F	0	0	0	0	1	0	1	1
G	0	0	0	0	1	1	0	1
H	0	0	0	0	1	1	1	0

	A	B	C	D	E	F	G	H
A	0	1	1	1	0	0	0	0
B	1	0	1	1	0	0	0	0
C	1	1	0	1	1	0	0	0
D	1	1	1	0	0	0	0	0
E	0	0	1	0	0	1	1	1
F	0	0	0	0	1	0	1	1
G	0	0	0	0	1	1	0	1
H	0	0	0	0	1	1	1	0

	A	B	C	D	E	F	G	H
A	0	1	1	1	0	0	0	0
B	1	0	1	1	0	0	0	0
C	1	1	0	1	1	0	0	0
D	1	1	1	0	0	0	0	0
E	0	0	1	0	0	1	1	1
F	0	0	0	0	1	0	1	1
G	0	0	0	0	1	1	0	1
H	0	0	0	0	1	1	1	0