Code
obj <- lm(speed ~ dist, data = cars)
class(obj)RaukR 2024 • Advanced R for Bioinformatics
Marcin Kierczak
21-Jun-2024
During this lab, we will go through the most important features of 3 out of the 4 existing OOP systems in R:
While S3 and S4 are base-R OOP systems, R6 is provided by the R6 package. Both S3 and S4 follow the functional OOP style while R6 is more similar to OOP models known from Java or C++. We will not cover the Reference Classes (a.k.a. RC) which is yet another base-R OOP model. Why? Well, it is really tricky to use and its documentation is not always complete…
lm() function?lm class have?lm object?lm class implement own str()?tibble? You have to load the tidyverse package and use on of the built-in datasets, e.g. mtcars as argument to as_tibble() to check the class.str() function used by tibbles the default str()?meta_frame that is a data.frame with additional attributes:
descr (a description),creation attribute which will hold the creation date.Check that it works, i.e. that it has the expected attributes and that it is a separate class.
description field?protein that stores the following information (you will implement the class in the next point, now just think and draw on paper if you wish):
protein class from the previous point using the constructor, validator and helper design pattern. Try to reduce the number of input arguments to: + + a sequence and
# Constructor
new_protein <- function(sequence, length, ptm_site, ptm_type) {
stopifnot(is.character(sequence))
stopifnot(is.integer(length))
stopifnot(is.numeric(ptm_site))
stopifnot(is.character(ptm_type))
structure(
sequence,
length = length,
ptm_site = ptm_site,
ptm_type = ptm_type,
class = "protein"
)
}
# Helper
protein <- function(sequence, ptm) {
sequence <- sequence
len <- nchar(sequence)
ptm_site <- ptm[[1]]
ptm_type = ptm[[2]]
validate_protein(new_protein(sequence, len, ptm_site, ptm_type))
}
# Validator
validate_protein <- function(x) {
sequence <- x[1]
len <- attr(x, 'length')
ptm_site <- attr(x, 'ptm_site')
ptm_type <- attr(x, 'ptm_type')
if (is.na(sequence)) {
stop('Sequence is missing!', call. = FALSE)
}
if (length(ptm_site) != length(ptm_type)) {
stop('PTM type and site differ in length!', call. = FALSE)
}
if ((sum(!ptm_type %in% c('phosphorylation', 'methylation'))) > 0) {
stop('Invalid PTM types detected!', call. = FALSE)
}
return(x)
}protein. Is it really user friendly?print for the protein class. It should print e.g.:[1] "============== Protein =============="
[1] "Sequence:ARNDKLLQWYTTARD"
[1] "Length: 15 aa."
[1] "============== PTM section =============="
[1] "Site: 3" "Site: 5"
[1] "Type: phosphorylation" "Type: methylation"
print.protein <- function(x) {
sequence <- unclass(x)
len <- attr(x, 'length')
ptm_site <- attr(x, 'ptm_site')
ptm_type <- attr(x, 'ptm_type')
print("============== Protein ==============")
print(paste0("Sequence:", sequence))
print(paste0("Length: ", len, " aa."))
print("============== PTM section ==============")
print(paste0("Site: ", ptm_site))
print(paste0("Type: ", ptm_type))
}protein class works as it should and that generic print works as well.protein class in S4 (with validation!).# Generator
.protein <- setClass('protein',
slots = c(
sequence = 'character',
length = 'numeric',
ptm_site = 'numeric',
ptm_type = 'character'
)
)
# Constructor
protein <- function(sequence, ptm) {
sequence <- sequence
len <- nchar(sequence)
ptm_site <- ptm[[1]]
ptm_type <- ptm[[2]]
if (is.na(sequence)) {
stop('Sequence is missing!', call. = FALSE)
}
if (length(ptm_site) != length(ptm_type)) {
stop('PTM type and site differ in length!', call. = FALSE)
}
if ((sum(!ptm_type %in% c('phosphorylation', 'methylation'))) > 0) {
stop('Invalid PTM types detected!', call. = FALSE)
}
pt <- .protein(sequence = sequence,
length = len,
ptm_site = ptm_site,
ptm_type = ptm_type)
return(pt)
}protein class and check whether it works.print using S4 and check that it works.setMethod('print', 'protein',
function(x) {
sequence <- x@sequence
len <- x@length
ptm_site <- x@ptm_site
ptm_type <- x@ptm_type
print("============== Protein ==============")
print(paste0("Sequence:", sequence))
print(paste0("Length: ", len, " aa."))
print("============== PTM section ==============")
print(paste0("Site: ", ptm_site))
print(paste0("Type: ", ptm_type))
})
print(my_prot)ext_protein that extends protein with 3 slots:
.ext_protein <- setClass('ext_protein',
contains = c('protein'),
slots = c(
prot = 'protein',
feature_type = 'character',
feature_position = 'numeric',
feature_value = 'character'
))
my_ext_prot <- .ext_protein(prot = my_prot,
feature_type = 'modification',
feature_position = 11,
feature_value = 'absent'
)
class(my_ext_prot)
typeof(my_ext_prot)
str(my_ext_prot)R6 package,protein class using R6 model and check that it works as expected:require(R6)
protein <- R6Class(classname = 'protein',
public = list(
seq = NA,
length = NULL,
ptm_site = NA,
ptm_type = NA,
initialize = function(seq = NA, ptm = NA) {
self$seq <- seq
self$length <- nchar(self$seq)
self$ptm_site <- ptm[[1]]
self$ptm_type <- ptm[[2]]
# Check types
stopifnot(is.character(seq))
#Validate
if (is.na(self$seq)) {
stop('Sequence is missing!', call. = FALSE)
}
if (length(self$ptm_site) != length(self$ptm_type)) {
stop('PTM type and site differ in length!', call. = FALSE)
}
if ((sum(!self$ptm_type %in% c('phosphorylation', 'methylation'))) > 0) {
stop('Invalid PTM types detected!', call. = FALSE)
}
}
)
)
my_new_prot <- protein$new(seq = "ARNDKLLQWYTTARD", ptm =
list(
site = c(3, 5),
type = c('phosphorylation', 'methylation')
))
str(my_new_prot)Congratulations! You are familiar with S3, S4 and R6 object models by now!