# suppressMessages(library(dplyr))
# suppressMessages(library(purrr))
suppressMessages(library(stringi))
suppressMessages(library(tidyr))
suppressMessages(library(tidyverse))
suppressMessages(library(plyranges))
suppressMessages(library(circlize))
suppressMessages(library(IRanges))
suppressMessages(library(data.table))
suppressMessages(library(grid))


# library(HiveR)
# library(rgl)

Output

1 Data visualization

Data visualization is part art and part science. The challenge is to get the art right without getting the science wrong and vice versa.

Anscombe’s quartet notwithstanding, and especially for large volumes of data, summary statistics and model estimates should be thought of as tools that we use to deliberately simplify things in a way that lets us see past a cloud of data points shown in a figure. We will not automatically get the right answer to our questions just by looking.

Tufte’s message is sometimes frustrating, but it is consistent:

Graphical excellence is the well-designed presentation of interesting data—a matter of substance, of statistics, and of design … [It] consists of complex ideas communicated with clarity, precision, and efficiency. … [It] is that which gives to the viewer the greatest number of ideas in the shortest time with the least ink in the smallest space … [It] is nearly always multivariate … And graphical excellence requires telling the truth about the data. (Tufte, 1983, p. 51).

To get more insight into data visualization and principles one should follow, please refer to following two books from where I have copied the above text.

Data Visualization: A practical introduction by Kieran Healy (https://socviz.co/lookatdata.html)
Fundamentals of Data Visualization by Claus O. Wilke (https://clauswilke.com/dataviz/)

2 Circos Plots

The circos plot capability to plot genomic data with chromosome coordinates that requires chromosom start and the end site indicating the length of the chromosome. Each chromosom will occupy a sector in the circos plot. Tracks can then be added using the coordinate within each sector (chromosome) provided and relecvant data for each value of the coordinates can be plotte for the respective track. Examples could be genes, transcripts, bases (SNPs, CpG sites) etc, each with start and the end site.

The ciclize package in R and circos algorithm in perl provide easy infrastructure to plot geneomic data.

Circular layout is very useful to represent complicated information. - it elegantly represents information with long axes or a large amount of categories - it intuitively shows data with multiple tracks focusing on the same object - it easily demonstrates relations between elements - it os aesthetically pleasing way of representing data

Circos plots provide and excellent framework to visulaize multi assay omics data. The circular layout allows us to plot several layers of data based on coordinates. Genomic coordinates are one good example where the whole genome is displayed in the form of coordintes. The genomic coordinates can then be used to stack different layers of data.

A circular layout is composed of sectors and tracks. For data in different categories, they are allocated into different sectors, and for multiple measurements on the same category, they are represented as stacked tracks from outside of the circle to the inside. The intersection of a sector and a track is called a cell (or a grid, a panel), which is the basic unit in a circular layout. It is an imaginary plotting region for drawing data points.

3 Principles of creating circos plot

The routine in making a circos plot works like

initialize layout -> create track -> add graphics -> create track -> add graphics - … -> clear

3.1 A basic example of circos plot

Let us create a small dataset and

set.seed(999)
n = 1000
df = data.frame(factors = sample(letters[1:8], n, replace = TRUE),
    x = rnorm(n), y = runif(n), z=rnorm(n))

head(df)

##   factors           x         y          z
## 1       c -0.33837650 0.2277292 -2.3816263
## 2       d  0.20128773 0.8345422 -0.3889743
## 3       e  0.60271173 0.6507941 -1.2688648
## 4       g -0.22501815 0.8613092 -1.9252602
## 5       a -0.01011359 0.3624672  0.3384133
## 6       f -0.19359724 0.5251656 -1.1476363

3.2 Initialize the circular layout

circos.par("track.height" = 0.1)
circos.initialize(factors = df$factors, x = df$x)


## create track

circos.track(factors = df$factors, y = df$y,
    panel.fun = function(x, y) {
        circos.text(CELL_META$xcenter, CELL_META$cell.ylim[2] + mm_y(5), 
            CELL_META$sector.index)
        circos.axis(labels.cex = 0.6)
})



## add graphics

circos.par("track.height" = 0.1)
circos.initialize(factors = df$factors, x = df$x)
circos.track(factors = df$factors, y = df$y,
    panel.fun = function(x, y) {
        circos.text(CELL_META$xcenter, CELL_META$cell.ylim[2] + mm_y(5), 
            CELL_META$sector.index)
        circos.axis(labels.cex = 0.6)
})
col = rep(c("#FF0000", "#00FF00"), 4)
circos.trackPoints(df$factors, df$x, df$y, col = col, pch = 16, cex = 0.5)
circos.text(-1, 0.5, "text", sector.index = "a", track.index = 1)


## Add more tracks 

bgcol = rep(c("#EFEFEF", "#CCCCCC"), 4)
circos.trackHist(df$factors, df$z, bin.size = 0.2, bg.col = bgcol, col = NA)

circos.track(ylim = c(0, 1), panel.fun = function(x, y) {
    xlim = CELL_META$xlim
    ylim = CELL_META$ylim
    breaks = seq(xlim[1], xlim[2], by = 0.1)
    n_breaks = length(breaks)
    circos.rect(breaks[-n_breaks], rep(ylim[1], n_breaks - 1),
                breaks[-1], rep(ylim[2], n_breaks - 1),
                col = rand_color(n_breaks), border = NA)
})


## Create links 

circos.link("a", 0, "b", 0, h = 0.4)
circos.link("c", c(-0.5, 0.5), "d", c(-0.5,0.5), col = "red",
    border = "blue", h = 0.2)
circos.link("e", 0, "g", c(-1,1), col = "green", border = "black", lwd = 2, lty = 2)



circos.clear()

low-level graphic functions that can be used for adding graphics.

circos.points() : adds points in a cell.
circos.lines() : adds lines in a cell.
circos.segments() : adds segments in a cell.
circos.rect()`: adds rectangles in a cell.
circos.polygon(): adds polygons in a cell.
circos.text(): adds text in a cell.
circos.axis() ands circos.yaxis(): add axis in a cell.

Following function draws links between two positions in the circle:

circos.link()

Following functions draw high-level graphics:

circos.barplot(): draw barplots. circos.boxplot(): draw boxplots. circos.violin(): draws violin plots. circos.heatmap(): draw circular heatmaps. circos.raster(): draw raster images. circos.arrow(): draw circular arrows.

Following functions arrange the circular layout.

circos.initialize(): allocates sectors on the circle. circos.track(): creates plotting regions for cells in one single track. circos.update(): updates an existed cell. circos.par(): graphic parameters. circos.info(): prints general parameters of current circular plot. circos.clear(): resets graphic parameters and internal variables.

3.3 Applications in Genomics/omics

Let us creat a bed file using the generateRandomBed() function from the “circlize” package.

3.4 Example for genomic data

3.5 circos.genomicPosTransformLines

For example, there are two sets of regions in a chromosome in which regions in one set regions are quite densely to each other and regions in other set are far from others. Heatmap or text is going to be drawn on the next track. If there is no position transformation, heatmap or text for those dense regions would be overlapped and hard to identify, also ugly to visualize. Thus, a way to transform original positions to new positions would help for the visualization.

circos.initializeWithIdeogram()
set.seed(123)
reds10 <- rand_color(10, hue = "red")
greens <- rand_color(10, hue = "green")
blues <- rand_color(10, hue = "blue")


#circos.genomicPosTransformLines(bed, posTransform = posTransform.default, horizontalLine = "top", col =reds10[1], lwd = 2)

3.6 Add heatmap to the genomic regions

The main idea for plotting data for circos plots is that one passes the data circos.genomicTrackPlotRegion() function together with a panel.function. Within the panel function argument, the relevant plotting function can be supplied togther with the relevant data. The relevant data maybe a bed file as a data frame where first three columns the file refer to the coordinates while column 4 and onwards refer to the data values to be plotted.

Here we want to plot heatmap which is done by circos.genomicRect() function that expects

circos.initializeWithIdeogram()
bed = generateRandomBed(nr = 400, nc = 8)
om = circos.par("track.margin")
oc = circos.par("cell.padding")

circos.par(track.margin = c(om[1], 0), cell.padding = c(0, 0, 0, 0))

f = colorRamp2(breaks = c(-1, 0, 1), colors = c("green", "white", "red"))

circos.genomicTrackPlotRegion(bed, stack = TRUE, panel.fun = function(coord, vals, ...) {
    circos.genomicRect(region = coord, value = vals, col = f(vals[[1]]), 
        border = f(vals[[1]]), lwd = 0.1, posTransform = posTransform.default, ...)
}, bg.border = NA, track.height = 0.1)


circos.par(track.margin = om, cell.padding = oc)
circos.clear()

Another way of plotting heatmaps is using circos.genomicHeatmap() function.

circos.initializeWithIdeogram()
bed = generateRandomBed(nr = 400, nc = 8)
om = circos.par("track.margin")
oc = circos.par("cell.padding")

circos.par(track.margin = c(om[1], 0), cell.padding = c(0, 0, 0, 0))

f = colorRamp2(breaks = c(-1, 0, 1), colors = c("green", "white", "red"))

circos.genomicTrackPlotRegion(bed, stack = TRUE, panel.fun = function(coord, vals, ...) {
    circos.genomicRect(region = coord, value = vals, col = f(vals[[1]]), 
        border = f(vals[[1]]), lwd = 0.1, posTransform = posTransform.default, ...)
}, bg.border = NA, track.height = 0.1)


circos.clear()

3.7 Points

circos.initializeWithIdeogram()

om = circos.par("track.margin")
oc = circos.par("cell.padding")

circos.par(track.margin = c(om[1], 0), cell.padding = c(0, 0, 0, 0))

f = colorRamp2(breaks = c(-1, 0, 1), colors = c("green", "white", "red"))

circos.genomicTrackPlotRegion(bed, stack = TRUE, panel.fun = function(coord, vals, ...) {
    circos.genomicRect(region = coord, value = vals, col = f(vals[[1]]), 
        border = f(vals[[1]]), lwd = 0.1, posTransform = posTransform.default, ...)
}, bg.border = NA, track.height = 0.1)


bed = generateRandomBed(nr = 900, fun = function(k) runif(k)*sample(c(-1, 1), k, replace = TRUE))

circos.genomicTrackPlotRegion(bed, ylim = c(-1, 1), panel.fun = function(coord, vals, ...) {
    col = ifelse(vals[[1]] > 0, "red", "green")
    circos.genomicPoints(region=coord, value=vals, col = col, cex = 0.5, pch = 16)
    cell.xlim = get.cell.meta.data("cell.xlim")
    for(h in c(-1, -0.5, 0, 0.5, 1)) {
        circos.lines(cell.xlim, c(h, h), col = "#00000040")
    }
}, track.height = 0.1)
circos.clear()

3.8 Add complex tracks

circos.initializeWithIdeogram()

om = circos.par("track.margin")
oc = circos.par("cell.padding")

circos.par(track.margin = c(om[1], 0), cell.padding = c(0, 0, 0, 0))

f = colorRamp2(breaks = c(-1, 0, 1), colors = c("green", "white", "red"))
bed = generateRandomBed(nr = 400, nc = 8)

circos.genomicTrackPlotRegion(bed, stack = TRUE, panel.fun = function(coord, vals, ...) {
    circos.genomicRect(region = coord, value = vals, col = f(vals[[1]]), 
        border = f(vals[[1]]), lwd = 0.1, posTransform = posTransform.default, ...)
}, bg.border = NA, track.height = 0.1)



bed = generateRandomBed(nr = 900, fun = function(k) runif(k)*sample(c(-1, 1), k, replace = TRUE))

circos.genomicTrackPlotRegion(bed, ylim = c(-1, 1), panel.fun = function(coord, vals, ...) {
    col = ifelse(vals[[1]] > 0, "red", "green")
    circos.genomicPoints(region=coord, value=vals, col = col, cex = 0.5, pch = 16)
    cell.xlim = get.cell.meta.data("cell.xlim")
    for(h in c(-1, -0.5, 0, 0.5, 1)) {
        circos.lines(cell.xlim, c(h, h), col = "#00000040")
    }
}, track.height = 0.1)


bed = generateRandomBed(nr = 900, fun = function(k) rnorm(k, 0, 50))
circos.genomicTrackPlotRegion(bed, panel.fun = function(region, value, ...) {
    x = (region[[2]] + region[[1]]) / 2
    y = value[[1]]
    loess.fit = loess(y ~ x)
    loess.predict = predict(loess.fit, x, se = TRUE)
    d1 = c(x, rev(x))
    d2 = c(loess.predict$fit + loess.predict$se.fit, rev(loess.predict$fit - loess.predict$se.fit))
    circos.polygon(d1, d2, col = "#CCCCCC", border = NA)
    circos.points(x, y, pch = 16, cex = 0.5)
    circos.lines(x, loess.predict$fit)
}, track.height = 0.1)

circos.clear()

circos.initializeWithIdeogram()
bed = generateRandomBed(nr = 400, nc = 8)

om = circos.par("track.margin")
oc = circos.par("cell.padding")

circos.par(track.margin = c(om[1], 0), cell.padding = c(0, 0, 0, 0))

f = colorRamp2(breaks = c(-1, 0, 1), colors = c("green", "white", "red"))

circos.genomicTrackPlotRegion(bed, stack = TRUE, panel.fun = function(coord, vals, ...) {
    circos.genomicRect(region = coord, value = vals, col = f(vals[[1]]), 
        border = f(vals[[1]]), lwd = 0.1, posTransform = posTransform.default, ...)
}, bg.border = NA, track.height = 0.1)



bed = generateRandomBed(nr = 900, fun = function(k) runif(k)*sample(c(-1, 1), k, replace = TRUE))

circos.genomicTrackPlotRegion(bed, ylim = c(-1, 1), panel.fun = function(coord, vals, ...) {
    col = ifelse(vals[[1]] > 0, "red", "green")
    circos.genomicPoints(region=coord, value=vals, col = col, cex = 0.5, pch = 16)
    cell.xlim = get.cell.meta.data("cell.xlim")
    for(h in c(-1, -0.5, 0, 0.5, 1)) {
        circos.lines(cell.xlim, c(h, h), col = "#00000040")
    }
}, track.height = 0.1)


bed = generateRandomBed(nr = 900, fun = function(k) rnorm(k, 0, 50))
circos.genomicTrackPlotRegion(bed, panel.fun = function(region, value, ...) {
    x = (region[[2]] + region[[1]]) / 2
    y = value[[1]]
    loess.fit = loess(y ~ x)
    loess.predict = predict(loess.fit, x, se = TRUE)
    d1 = c(x, rev(x))
    d2 = c(loess.predict$fit + loess.predict$se.fit, rev(loess.predict$fit - loess.predict$se.fit))
    circos.polygon(d1, d2, col = "#CCCCCC", border = NA)
    circos.points(x, y, pch = 16, cex = 0.5)
    circos.lines(x, loess.predict$fit)
}, track.height = 0.1)

bed_list = list(generateRandomBed(nr = 900, fun = function(k) runif(k)),
                generateRandomBed(nr = 900, fun = function(k) runif(k)))
col = c("#FF000040", "#0000FF40")
circos.genomicTrackPlotRegion(bed_list, ylim = c(-1, 1), panel.fun = function(region, value, ...) {
    i = getI(...)
    if(i == 1) {
        circos.genomicLines(region, value, area = TRUE, baseline = 0, col = "red", border = NA, ...)
    } else {
        circos.genomicLines(region, -value, area = TRUE, baseline = 0, col = "green", border = NA, ...)
    }
}, track.height = 0.1)

## Add links
region1 = generateRandomBed(nr = 1500); region1 = region1[sample(nrow(region1), 30), ]
region2 = generateRandomBed(nr = 1500); region2 = region2[sample(nrow(region2), 30), ]
circos.genomicLink(region1, region2, col = sample(30, 30, replace = TRUE))


## Highlight chromosomes, tracks and indexes


highlight.chromosome(c("chr1", "chr12"))
highlight.sector(c("chr3", "chr1"), track.index = c(2,3))
circos.clear()

4 Hiveplot

Hive plots are an alternative way of visualizing the networks where different parameters and attributes of networks can be plotted on multiple linear axis. The attibutes of nodes and edges are very similar to a network except that instead of trying to intepret intesities of colors, degree of nodes etc. in a network, one can compare network paramaters on a linear scale. An advantage of hive plots for networks is that the subjective observations of nodes and edges of network layout can be obejectively compared.

It all sounds wordy but once you see your results in prtactice, it is much easier to understand. The interpretation of values mapped to the axis can

Create Sif file
Create Gsc file Here I quote HiveR paper for intuition behind hive plots.

“HiveR was inspired by the concept of hive plots as developed by Martin Krzy- winski at the Genome Sciences Center (www.hiveplot.com). Hive plots are a reaction to “hairball” style networks in which the layout of the network is arbi- trary and hypersensitive to even small changes in the underlying network. Hive plots are particularly well-suited for comparing networks, as well as for the discovery of emergent properties of networks. The key innovation in a hive plot, compared to other means of graphically displaying network structure, is how node information is handled. In a hive plot, there is a node coordinate system consisting of two parts. First, nodes are assigned to axes based upon qualitative or quantitative characteristics of the the node, for instance membership in a certain category. As will be discussed later, this assignment process is key to constructing a hive plot. Second, the position of the node along the axis, the radius, is based upon some quantitative characteristic of the node. Edges are handled in a fairly standard way, but may be colored or have a width or weight which encodes an interesting value. In creating a hive plot, one maps network parameters to the plot, and thus the process can be readily tuned to meet one’s needs. The mappable parame- ters are listed in Table 1, and the mapping is limited only by one’s creativity and the particular knowledge domain. Thus ecologists have their own measures of food webs, social network analysts have various measures describing intercon- nectedness etc. An essential point is that mapping network parameters in this way results in a reproducible plot."

For this session you may have to install HiveR package to your current conda environment manually as it in currently present in your environment.

#install.packages("HiveR")
suppressMessages(suppressWarnings(library(HiveR)))

4.1 HiveR functionality

To explore more about the hive plot let us look at the package vignette.

browseVignettes("HiveR")

HiveR Vignette provides an overview of the package design and functions. Just like Circlize package rand hive plot data can be created using ranHiveData functions.

ranHive <- ranHiveData(nx=4)
class(ranHive)

## [1] "HivePlotData"

As we can see that ranHive data belongs to “HivePlotData” class and we can explore with sort of data and attributes it is required to have.

help("HivePlotData")

Lets explore the data we created using ranHiveData generator.

str(ranHive)

## List of 5
##  $ nodes    :'data.frame':   50 obs. of  6 variables:
##   ..$ id    : int [1:50] 2 3 4 5 6 7 8 9 10 11 ...
##   ..$ lab   : chr [1:50] "ffnkj" "ocatf" "ibckx" "ekiea" ...
##   ..$ axis  : int [1:50] 2 2 3 4 1 4 3 1 1 3 ...
##   ..$ radius: num [1:50] 34 48 66 52 66 16 51 8 66 87 ...
##   ..$ size  : num [1:50] 0.5 1.5 1 0.5 1 0.5 1.5 1.5 1.5 0.5 ...
##   ..$ color : chr [1:50] "#984EA3" "#FF7F00" "#FF7F00" "#4DAF4A" ...
##  $ edges    :'data.frame':   60 obs. of  4 variables:
##   ..$ id1   : int [1:60] 15 15 9 45 34 9 10 28 45 9 ...
##   ..$ id2   : int [1:60] 19 21 2 21 20 26 3 21 31 19 ...
##   ..$ weight: num [1:60] 2 3 1 3 2 1 3 3 3 1 ...
##   ..$ color : chr [1:60] "#377EB8" "#984EA3" "#FF7F00" "#4DAF4A" ...
##  $ type     : chr "2D"
##  $ desc     : chr "4 axes -- 50 nodes -- 60 edges"
##  $ axis.cols: chr [1:4] "#E41A1C" "#377EB8" "#4DAF4A" "#984EA3"
##  - attr(*, "class")= chr "HivePlotData"

4.2 Plot the generated dat

Let us make a plot to see an example of a hive plot.

plotHive(ranHive)

4.3 Data import

There are two ways of importing data into “HivePlotData” class. The HiveR package “dot2HPD” provides an option to import networks in the dot file format. One can read more about the file format here (https://secure.wikimedia.org/wikipedia/en/wiki/DOT_language).

A second way of creating “HivePlotData” is from your own netwrok files. Here we will first use - “E.Coli” RegulonDB provided in the HiveR package by using built in functions - Use Piano package’s output to create Hive plot from your own data by converting it to “HivePlotData” class.

 EC1 <- dot2HPD(file = "Circos_HiveR/hiveData/E_coli/E_coli_TF.dot", node.inst = NULL,
edge.inst = "Circos_HiveR/hiveData/E_coli/EdgeInst_TF.csv", desc = "E coli gene regulatory network (RegulonDB)", axis.cols = rep("grey", 3))
sumHPD(EC1)

##  E coli gene regulatory network (RegulonDB)
##  This hive plot data set contains 1597 nodes on 1 axes and 3893 edges.
##  It is a  2D data set.
## 
##      Axis 1 has 1597 nodes spanning radii from 1 to 1 
## 
##      Axes 1 and 1 share 3893 edges

EC2 <- mineHPD(EC1, option = "rad <- tot.edge.count")
sumHPD(EC2)

##  E coli gene regulatory network (RegulonDB)
##  This hive plot data set contains 1597 nodes on 1 axes and 3893 edges.
##  It is a  2D data set.
## 
##      Axis 1 has 1597 nodes spanning radii from 1 to 434 
## 
##      Axes 1 and 1 share 3893 edges

EC3 <- mineHPD(EC2, option = "axis <- source.man.sink")
sumHPD(EC3)

##  E coli gene regulatory network (RegulonDB)
##  This hive plot data set contains 1597 nodes on 3 axes and 3893 edges.
##  It is a  2D data set.
## 
##      Axis 1 has 45 nodes spanning radii from 1 to 83 
##      Axis 2 has 1416 nodes spanning radii from 1 to 11 
##      Axis 3 has 136 nodes spanning radii from 2 to 434 
## 
##      Axes 1 and 2 share 400 edges
##      Axes 1 and 3 share 21 edges
##      Axes 3 and 2 share 3158 edges
##      Axes 3 and 3 share 314 edges

EC4 <- mineHPD(EC3, option = "remove zero edge")
sumHPD(EC4)

## 
##   113 edge(s) that start and end on the same node were removed
## 
##   22 virtual self-edge(s) were removed
##  E coli gene regulatory network (RegulonDB)
##  This hive plot data set contains 1597 nodes on 3 axes and 3768 edges.
##  It is a  2D data set.
## 
##      Axis 1 has 45 nodes spanning radii from 1 to 83 
##      Axis 2 has 1416 nodes spanning radii from 1 to 11 
##      Axis 3 has 136 nodes spanning radii from 2 to 434 
## 
##      Axes 1 and 2 share 400 edges
##      Axes 1 and 3 share 21 edges
##      Axes 3 and 2 share 3158 edges
##      Axes 3 and 3 share 189 edges

4.4 Work with edges

edges <- EC4$edges
edgesR <- subset(edges, color == 'red')
edgesG <- subset(edges, color == 'green')
edgesO <- subset(edges, color == 'orange')

edges <- rbind(edgesO, edgesG, edgesR)
EC4$edges <- edges

EC4$edges$weight <- 0.5

plotHive(EC4, dr.nodes = FALSE, ch = 20,
axLabs = c("source", "sink", "manager"),
axLab.pos = c(40, 75, 35),
axLab.gpar = gpar(fontsize = 6, col = "white", lwd = 2),
arrow = c("degree", 150, 100, 180, 70), np = FALSE)
grid.text("native units", x = 0.5, y = 0.05, default.units = "npc", gp = gpar(fontsize = 8, col = "white"))

plotHive(EC4, dr.nodes = FALSE, method = "rank", ch = 100,
#axLabs = c("source", "sink", "manager"),
#axLab.pos = c(100, 125, 180),
#axLab.gpar = gpar(fontsize = 10, col = "white"),
np = FALSE)
grid.text("ranked units", x = 0.5, y = 0.05, default.units = "npc", gp = gpar(fontsize = 8, col = "white"))

plotHive(EC4, dr.nodes = FALSE, method = "norm", ch = 0.1, axLabs = c("source", "sink", "manager"),
axLab.pos = c(0.1, 0.2, 0.2), axLab.gpar = gpar(fontsize = 6, col = "white"), np = FALSE)
grid.text("normed units", x = 0.5, y = 0.05, default.units = "npc", gp = gpar(fontsize = 8, col = "white"))

4.5 Load the network data for pathways and metabolites

Gene_stats<-read.delim("Circos_HiveR/hiveData/hmrData/GLS_Palm_vs_control_rep_pathway.txt")
met_stats<-read.delim("Circos_HiveR/hiveData/hmrData/GSS_Palm_vs_control_rep_met.txt")
path_stats<-  read.delim("Circos_HiveR/hiveData/hmrData/GSS_Palm_vs_control_rep_pathway.txt")
met_gene<-read.delim("Circos_HiveR/hiveData/hmrData/GSC_Palm_vs_control_rep_met.txt", header = FALSE)
path_gene<-read.delim("Circos_HiveR/hiveData/hmrData/GSC_Palm_vs_control_rep_pathway.txt", header = FALSE)

Take a look at the loaded files.

The code below will create a data of “HivePlotData” class from the output of the Piano package in R. The code at the moment is is not fully commeneted but please have a look at the course homepage for updates. For those who are familiar with R, can try and understand it from the implemented commands.

met_gene_z<- met_gene[which(met_gene$V1 %in% met_stats$Name),]
path_gene_z<-path_gene[which(as.character(path_gene$V1) %in% as.character(path_stats$Name)),]


Gene_stats$ID1<-c(1:(dim(Gene_stats)[1]))
Gene_stats$padj<-p.adjust(Gene_stats$p, method = "fdr")
met_gene$ID2<-as.integer(as.numeric(factor(met_gene$V1)))
path_gene$ID3<-as.integer(as.numeric(factor(path_gene$V1)))

gene_met_tmp<-merge(met_gene, Gene_stats, by.x = "V2", by.y ="g" )
gene_met_tmp$ID11<-as.integer(as.numeric(factor(gene_met_tmp$V2)))
gene_met_tmp$ID11<-gene_met_tmp$ID11+max(gene_met_tmp$ID2)
gene_met_tmp<-merge(gene_met_tmp, path_gene, by.x = "V2", by.y = "V2" )
gene_met_tmp$ID31<- gene_met_tmp$ID3+max(gene_met_tmp$ID11)

met_stats_sig<-(met_stats[met_stats$p.adj..non.dir..<0.05,])
path_stats_sig<-path_stats[path_stats$p.adj..non.dir..<0.00001,]

sig_mets<-met_stats_sig$Name %>%as.character()
sig_paths<-path_stats_sig$Name%>% as.character()


gene_met_tmp_sig<-gene_met_tmp[which( as.character(gene_met_tmp$V1.x)%in% sig_mets|  as.character(gene_met_tmp$V1.y)%in% sig_paths),]

gene_met_tmp_sig <-gene_met_tmp_sig[gene_met_tmp_sig$padj<0.000001,]

#Create a node table
x_sig<-unique(c(gene_met_tmp_sig$ID2, gene_met_tmp_sig$ID11,gene_met_tmp_sig$ID31)) #nodes
x1_sig<-as.character((c(as.character(unique(gene_met_tmp_sig$V1.x)), as.character(unique(gene_met_tmp_sig$V2)),as.character(unique(gene_met_tmp_sig$V1.y))))) #labels
x2_sig<-rep(1:3, c(length(unique(gene_met_tmp_sig$V1.x)), length(unique(gene_met_tmp_sig$V2)),length(unique(gene_met_tmp_sig$V1.y)))) # assign axis

x3_sig<-  c(-log10(Gene_stats[which(x1_sig %in% Gene_stats$g),"padj"])*50, -log10(met_stats[which(x1_sig %in% met_stats$Name),"p..non.dir.."])*50,-log10(path_stats[which(path_stats$Name %in% x1_sig), "p..non.dir.."])*100 )   #radius i.e., postion on the axis is arranged by meanigful graphical parameter e.g., significance

x4_sig<-  scale(abs(c(Gene_stats[which(x1_sig %in% rownames(Gene_stats)),"myFC"], met_stats_sig[which(x1_sig %in% met_stats$Name ), "Genes..tot." ],path_stats[which(path_stats$Name %in% x1_sig), "Genes..tot." ] )), center = FALSE, scale = TRUE) #size of the node e.g., Degree of the node or fold change etc
x4_sig_a<-rep(1, length(x3_sig))

x5_sig<-  as.character(rep(c("#E41A1C" ,"#984EA3", "#4DAF4A"), c(length(unique(gene_met_tmp_sig$V1.x)), length(unique(gene_met_tmp_sig$V2)),length(unique(gene_met_tmp_sig$V1.y)))))   #Node color
  
#xx<-c(gene_stats[which(rownames(gene_stats) %in%x1),"myPval"], met_stats[which(met_stats$Name %in% x1), "p..non.dir.."],path_stats[which(path_stats$Name %in% x1), "p..non.dir.."] )

# Create and edge table
y_sig<-  c(gene_met_tmp_sig$ID2, gene_met_tmp_sig$ID11,gene_met_tmp_sig$ID2 )#ID1
y1_sig<- c(gene_met_tmp_sig$ID11, gene_met_tmp_sig$ID31,gene_met_tmp_sig$ID31)#ID2
y2_sig<- rep(2, length(y_sig)) # weight
y3_sig<- as.character(rep ("pink" , length(y_sig)))#color

HivePlotData<-list()
nodes_sig<-data.frame(x_sig,x1_sig,x2_sig,x3_sig,x4_sig_a,x5_sig)
colnames(nodes_sig)<-c("id", "lab", "axis","radius", "size", "color")
edges_sig<- data.frame(y_sig, y1_sig, y2_sig, y3_sig)
colnames(edges_sig)<- c("id1", "id2", "weight", "color")
type<- c("2D")
desc<-c("many nodes")
#axis.cols<- c("#E41A1C" ,"#377EB8", "#4DAF4A")
axis.cols<- c("blue" ,"purple", "green")

Data_hive_sig<- list(nodes_sig, edges_sig, type, desc, axis.cols)

names(Data_hive_sig)<-c("nodes", "edges", "type", "desc", "axis.cols")
Data_hive_sig$nodes$lab<-as.character(Data_hive_sig$nodes$lab)
Data_hive_sig$nodes$color<-as.character(Data_hive_sig$nodes$color)
Data_hive_sig$edges$color<-as.character(Data_hive_sig$edges$color)



class(Data_hive_sig)<-"HivePlotData"

plotHive(Data_hive_sig, bkgnd = "black",
    axLabs = c("Metabolites", "Genes", "Pathways"),
    axLab.pos = c(1, 1),
    ch = 0.5, method = "norm"
    )

Cytoscape_R

Ashfaq Ali

09/10/2020