Code
<- lm(speed ~ dist, data = cars)
obj class(obj)
RaukR 2025 • R Beyond the Basics
Marcin Kierczak
08-May-2025
During this lab, we will go through the most important features of 3 out of the 4 existing OOP systems in R:
While S3 and S4 are base-R OOP systems, R6 is provided by the R6
package. Both S3 and S4 follow the functional OOP style while R6
is more similar to OOP models known from Java or C++. We will not cover the Reference Classes (a.k.a. RC) which is yet another base-R OOP model. Why? Well, it is really tricky to use and its documentation is not always complete…
lm()
function?lm
class have?lm
object?lm
class implement own str()
?tibble
? You have to load the tidyverse
package and use on of the built-in datasets, e.g. mtcars
as argument to as_tibble()
to check the class.str()
function used by tibbles the default str()
?meta_frame
that is a data.frame
with additional attributes:
descr
(a description),creation
attribute which will hold the creation date.Check that it works, i.e. that it has the expected attributes and that it is a separate class.
description
field?protein
that stores the following information (you will implement the class in the next point, now just think and draw on paper if you wish):
protein
class from the previous point using the constructor, validator and helper design pattern. Try to reduce the number of input arguments to: + + a sequence and
# Constructor
new_protein <- function(sequence, length, ptm_site, ptm_type) {
stopifnot(is.character(sequence))
stopifnot(is.integer(length))
stopifnot(is.numeric(ptm_site))
stopifnot(is.character(ptm_type))
structure(
sequence,
length = length,
ptm_site = ptm_site,
ptm_type = ptm_type,
class = "protein"
)
}
# Helper
protein <- function(sequence, ptm) {
sequence <- sequence
len <- nchar(sequence)
ptm_site <- ptm[[1]]
ptm_type <- ptm[[2]]
validate_protein(new_protein(sequence, len, ptm_site, ptm_type))
}
# Validator
validate_protein <- function(x) {
sequence <- x[1]
len <- attr(x, "length")
ptm_site <- attr(x, "ptm_site")
ptm_type <- attr(x, "ptm_type")
if (is.na(sequence)) {
stop("Sequence is missing!", call. = FALSE)
}
if (length(ptm_site) != length(ptm_type)) {
stop("PTM type and site differ in length!", call. = FALSE)
}
if ((sum(!ptm_type %in% c("phosphorylation", "methylation"))) > 0) {
stop("Invalid PTM types detected!", call. = FALSE)
}
return(x)
}
protein
. Is it really user friendly?print
for the protein
class. It should print e.g.:[1] "============== Protein =============="
[1] "Sequence:ARNDKLLQWYTTARD"
[1] "Length: 15 aa."
[1] "============== PTM section =============="
[1] "Site: 3" "Site: 5"
[1] "Type: phosphorylation" "Type: methylation"
print.protein <- function(x) {
sequence <- unclass(x)
len <- attr(x, "length")
ptm_site <- attr(x, "ptm_site")
ptm_type <- attr(x, "ptm_type")
print("============== Protein ==============")
print(paste0("Sequence:", sequence))
print(paste0("Length: ", len, " aa."))
print("============== PTM section ==============")
print(paste0("Site: ", ptm_site))
print(paste0("Type: ", ptm_type))
}
protein
class works as it should and that generic print
works as well.protein
class in S4 (with validation!).# Generator
.protein <- setClass("protein",
slots = c(
sequence = "character",
length = "numeric",
ptm_site = "numeric",
ptm_type = "character"
)
)
# Constructor
protein <- function(sequence, ptm) {
sequence <- sequence
len <- nchar(sequence)
ptm_site <- ptm[[1]]
ptm_type <- ptm[[2]]
if (is.na(sequence)) {
stop("Sequence is missing!", call. = FALSE)
}
if (length(ptm_site) != length(ptm_type)) {
stop("PTM type and site differ in length!", call. = FALSE)
}
if ((sum(!ptm_type %in% c("phosphorylation", "methylation"))) > 0) {
stop("Invalid PTM types detected!", call. = FALSE)
}
pt <- .protein(
sequence = sequence,
length = len,
ptm_site = ptm_site,
ptm_type = ptm_type
)
return(pt)
}
protein
class and check whether it works.print
using S4 and check that it works.setMethod(
"print", "protein",
function(x) {
sequence <- x@sequence
len <- x@length
ptm_site <- x@ptm_site
ptm_type <- x@ptm_type
print("============== Protein ==============")
print(paste0("Sequence:", sequence))
print(paste0("Length: ", len, " aa."))
print("============== PTM section ==============")
print(paste0("Site: ", ptm_site))
print(paste0("Type: ", ptm_type))
}
)
print(my_prot)
ext_protein
that extends protein
with 3 slots:
.ext_protein <- setClass("ext_protein",
contains = c("protein"),
slots = c(
prot = "protein",
feature_type = "character",
feature_position = "numeric",
feature_value = "character"
)
)
my_ext_prot <- .ext_protein(
prot = my_prot,
feature_type = "modification",
feature_position = 11,
feature_value = "absent"
)
class(my_ext_prot)
typeof(my_ext_prot)
str(my_ext_prot)
R6
package,protein
class using R6 model and check that it works as expected:require(R6)
protein <- R6Class(
classname = "protein",
public = list(
seq = NA,
length = NULL,
ptm_site = NA,
ptm_type = NA,
initialize = function(seq = NA, ptm = NA) {
self$seq <- seq
self$length <- nchar(self$seq)
self$ptm_site <- ptm[[1]]
self$ptm_type <- ptm[[2]]
# Check types
stopifnot(is.character(seq))
# Validate
if (is.na(self$seq)) {
stop("Sequence is missing!", call. = FALSE)
}
if (length(self$ptm_site) != length(self$ptm_type)) {
stop("PTM type and site differ in length!", call. = FALSE)
}
if ((sum(!self$ptm_type %in% c("phosphorylation", "methylation"))) > 0) {
stop("Invalid PTM types detected!", call. = FALSE)
}
}
)
)
my_new_prot <- protein$new(
seq = "ARNDKLLQWYTTARD", ptm =
list(
site = c(3, 5),
type = c("phosphorylation", "methylation")
)
)
str(my_new_prot)
Congratulations! You are familiar with S3, S4 and R6 object models by now!
R version 4.4.3 (2025-02-28)
Platform: x86_64-pc-linux-gnu
Running under: Ubuntu 24.04.2 LTS
Matrix products: default
BLAS: /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.26.so; LAPACK version 3.12.0
locale:
[1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
[3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
[5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
[7] LC_PAPER=en_US.UTF-8 LC_NAME=C
[9] LC_ADDRESS=C LC_TELEPHONE=C
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
time zone: Etc/UTC
tzcode source: system (glibc)
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] bsplus_0.1.5 R6_2.6.1 lubridate_1.9.4 forcats_1.0.0
[5] stringr_1.5.1 dplyr_1.1.4 purrr_1.0.4 readr_2.1.5
[9] tidyr_1.3.1 tibble_3.2.1 ggplot2_3.5.2 tidyverse_2.0.0
loaded via a namespace (and not attached):
[1] gtable_0.3.6 jsonlite_2.0.0 compiler_4.4.3 tidyselect_1.2.1
[5] scales_1.3.0 yaml_2.3.10 fastmap_1.2.0 generics_0.1.3
[9] knitr_1.50 htmlwidgets_1.6.4 munsell_0.5.1 pillar_1.10.2
[13] tzdb_0.5.0 rlang_1.1.6 stringi_1.8.7 xfun_0.52
[17] timechange_0.3.0 cli_3.6.5 withr_3.0.2 magrittr_2.0.3
[21] digest_0.6.37 grid_4.4.3 hms_1.1.3 lifecycle_1.0.4
[25] vctrs_0.6.5 evaluate_1.0.3 glue_1.8.0 codetools_0.2-20
[29] colorspace_2.1-1 rmarkdown_2.29 tools_4.4.3 pkgconfig_2.0.3
[33] htmltools_0.5.8.1