Code
<- lm(speed ~ dist, data = cars)
obj class(obj)
RaukR 2024 • Advanced R for Bioinformatics
Marcin Kierczak
21-Jun-2024
During this lab, we will go through the most important features of 3 out of the 4 existing OOP systems in R:
While S3 and S4 are base-R OOP systems, R6 is provided by the R6
package. Both S3 and S4 follow the functional OOP style while R6
is more similar to OOP models known from Java or C++. We will not cover the Reference Classes (a.k.a. RC) which is yet another base-R OOP model. Why? Well, it is really tricky to use and its documentation is not always complete…
lm()
function?lm
class have?lm
object?lm
class implement own str()
?tibble
? You have to load the tidyverse
package and use on of the built-in datasets, e.g. mtcars
as argument to as_tibble()
to check the class.str()
function used by tibbles the default str()
?meta_frame
that is a data.frame
with additional attributes:
descr
(a description),creation
attribute which will hold the creation date.Check that it works, i.e. that it has the expected attributes and that it is a separate class.
description
field?protein
that stores the following information (you will implement the class in the next point, now just think and draw on paper if you wish):
protein
class from the previous point using the constructor, validator and helper design pattern. Try to reduce the number of input arguments to: + + a sequence and
# Constructor
new_protein <- function(sequence, length, ptm_site, ptm_type) {
stopifnot(is.character(sequence))
stopifnot(is.integer(length))
stopifnot(is.numeric(ptm_site))
stopifnot(is.character(ptm_type))
structure(
sequence,
length = length,
ptm_site = ptm_site,
ptm_type = ptm_type,
class = "protein"
)
}
# Helper
protein <- function(sequence, ptm) {
sequence <- sequence
len <- nchar(sequence)
ptm_site <- ptm[[1]]
ptm_type = ptm[[2]]
validate_protein(new_protein(sequence, len, ptm_site, ptm_type))
}
# Validator
validate_protein <- function(x) {
sequence <- x[1]
len <- attr(x, 'length')
ptm_site <- attr(x, 'ptm_site')
ptm_type <- attr(x, 'ptm_type')
if (is.na(sequence)) {
stop('Sequence is missing!', call. = FALSE)
}
if (length(ptm_site) != length(ptm_type)) {
stop('PTM type and site differ in length!', call. = FALSE)
}
if ((sum(!ptm_type %in% c('phosphorylation', 'methylation'))) > 0) {
stop('Invalid PTM types detected!', call. = FALSE)
}
return(x)
}
protein
. Is it really user friendly?print
for the protein
class. It should print e.g.:[1] "============== Protein =============="
[1] "Sequence:ARNDKLLQWYTTARD"
[1] "Length: 15 aa."
[1] "============== PTM section =============="
[1] "Site: 3" "Site: 5"
[1] "Type: phosphorylation" "Type: methylation"
print.protein <- function(x) {
sequence <- unclass(x)
len <- attr(x, 'length')
ptm_site <- attr(x, 'ptm_site')
ptm_type <- attr(x, 'ptm_type')
print("============== Protein ==============")
print(paste0("Sequence:", sequence))
print(paste0("Length: ", len, " aa."))
print("============== PTM section ==============")
print(paste0("Site: ", ptm_site))
print(paste0("Type: ", ptm_type))
}
protein
class works as it should and that generic print
works as well.protein
class in S4 (with validation!).# Generator
.protein <- setClass('protein',
slots = c(
sequence = 'character',
length = 'numeric',
ptm_site = 'numeric',
ptm_type = 'character'
)
)
# Constructor
protein <- function(sequence, ptm) {
sequence <- sequence
len <- nchar(sequence)
ptm_site <- ptm[[1]]
ptm_type <- ptm[[2]]
if (is.na(sequence)) {
stop('Sequence is missing!', call. = FALSE)
}
if (length(ptm_site) != length(ptm_type)) {
stop('PTM type and site differ in length!', call. = FALSE)
}
if ((sum(!ptm_type %in% c('phosphorylation', 'methylation'))) > 0) {
stop('Invalid PTM types detected!', call. = FALSE)
}
pt <- .protein(sequence = sequence,
length = len,
ptm_site = ptm_site,
ptm_type = ptm_type)
return(pt)
}
protein
class and check whether it works.print
using S4 and check that it works.setMethod('print', 'protein',
function(x) {
sequence <- x@sequence
len <- x@length
ptm_site <- x@ptm_site
ptm_type <- x@ptm_type
print("============== Protein ==============")
print(paste0("Sequence:", sequence))
print(paste0("Length: ", len, " aa."))
print("============== PTM section ==============")
print(paste0("Site: ", ptm_site))
print(paste0("Type: ", ptm_type))
})
print(my_prot)
ext_protein
that extends protein
with 3 slots:
.ext_protein <- setClass('ext_protein',
contains = c('protein'),
slots = c(
prot = 'protein',
feature_type = 'character',
feature_position = 'numeric',
feature_value = 'character'
))
my_ext_prot <- .ext_protein(prot = my_prot,
feature_type = 'modification',
feature_position = 11,
feature_value = 'absent'
)
class(my_ext_prot)
typeof(my_ext_prot)
str(my_ext_prot)
R6
package,protein
class using R6 model and check that it works as expected:require(R6)
protein <- R6Class(classname = 'protein',
public = list(
seq = NA,
length = NULL,
ptm_site = NA,
ptm_type = NA,
initialize = function(seq = NA, ptm = NA) {
self$seq <- seq
self$length <- nchar(self$seq)
self$ptm_site <- ptm[[1]]
self$ptm_type <- ptm[[2]]
# Check types
stopifnot(is.character(seq))
#Validate
if (is.na(self$seq)) {
stop('Sequence is missing!', call. = FALSE)
}
if (length(self$ptm_site) != length(self$ptm_type)) {
stop('PTM type and site differ in length!', call. = FALSE)
}
if ((sum(!self$ptm_type %in% c('phosphorylation', 'methylation'))) > 0) {
stop('Invalid PTM types detected!', call. = FALSE)
}
}
)
)
my_new_prot <- protein$new(seq = "ARNDKLLQWYTTARD", ptm =
list(
site = c(3, 5),
type = c('phosphorylation', 'methylation')
))
str(my_new_prot)
Congratulations! You are familiar with S3, S4 and R6 object models by now!