Skip to contents

Aggregate

Usage

aggregate(x, ...)

aggregateCols(x, ...)

aggregateRows(x, ...)

# S4 method for Matrix
aggregate(x, by, fun = c("sum", "mean", "n"), MARGIN = 1L)

# S4 method for SummarizedExperiment
aggregate(x, col = "aggregate", fun = "sum", MARGIN = 1L)

# S4 method for matrix
aggregate(
  x,
  by,
  fun = c("sum", "mean", "median", "geometricMean", "n"),
  MARGIN = 1L
)

# S4 method for Matrix
aggregateCols(x, ...)

# S4 method for SummarizedExperiment
aggregateCols(x, ...)

# S4 method for matrix
aggregateCols(x, ...)

# S4 method for Matrix
aggregateRows(x, ...)

# S4 method for SummarizedExperiment
aggregateRows(x, ...)

# S4 method for matrix
aggregateRows(x, ...)

Arguments

x

Object.

by

factor. Aggregation groupings. The new aggregate names are defined as the factor levels, and the original, unaggregated names are defined as the names.

fun

character(1). Name of the aggregation function to apply. Uses match.arg() internally.

MARGIN

integer(1-2). Dimension where the function will be applied. For a two-dimensional matrix: 1 indicates rows; 2 indicates columns; c(1, 2) indicates rows and columns.

col

character(1). Name of column in either rowData() or colData() that defines the desired aggregation groupings.

...

Additional arguments.

Value

Modified object.

Methods (by class)

  • aggregate(SummarizedExperiment): Arguments pass through to matrix or Matrix method, depending on the class of matrix defined in requested assay.

Note

Updated 2021-09-13.

Methods (by class)

  • matrix, Matrix: Aggregate using a grouping factor.

  • SummarizedExperiment: Aggregate data slotted in assays() using an automatically generated grouping factor, which is obtained from a user-defined column (col argument) in either the rowData() or colData() of the object. Slot an aggregate column into rowData() for aggregateRows(), or into colData() for aggregateCols(). This method will define the groupings automatically, and perform the aggregation.

See also

Author

Michael Steinbaugh, Rory Kirchner

Examples

counts <- matrix(
    data = c(
        0L, 2L, 2L, 2L,
        2L, 0L, 2L, 2L,
        2L, 2L, 0L, 2L,
        2L, 2L, 2L, 0L
    ),
    nrow = 4L,
    ncol = 4L,
    byrow = TRUE,
    dimnames = list(
        paste0("transcript", seq_len(4L)),
        paste(
            paste0("sample", rep(seq_len(2L), each = 2L)),
            paste0("replicate", rep(seq_len(2L), times = 2L)),
            sep = "_"
        )
    )
)

genes <- factor(paste0("gene", rep(seq_len(2L), each = 2L)))
names(genes) <- rownames(counts)
print(genes)
#> transcript1 transcript2 transcript3 transcript4 
#>       gene1       gene1       gene2       gene2 
#> Levels: gene1 gene2

samples <- factor(paste0("sample", rep(seq_len(2L), each = 2L)))
names(samples) <- colnames(counts)
print(samples)
#> sample1_replicate1 sample1_replicate2 sample2_replicate1 sample2_replicate2 
#>            sample1            sample1            sample2            sample2 
#> Levels: sample1 sample2

## matrix ====
object <- counts
print(object)
#>             sample1_replicate1 sample1_replicate2 sample2_replicate1
#> transcript1                  0                  2                  2
#> transcript2                  2                  0                  2
#> transcript3                  2                  2                  0
#> transcript4                  2                  2                  2
#>             sample2_replicate2
#> transcript1                  2
#> transcript2                  2
#> transcript3                  2
#> transcript4                  0
aggregate(object, by = genes, MARGIN = 1L)
#>       sample1_replicate1 sample1_replicate2 sample2_replicate1
#> gene1                  2                  2                  4
#> gene2                  4                  4                  2
#>       sample2_replicate2
#> gene1                  4
#> gene2                  2
aggregateRows(object, by = genes)
#>       sample1_replicate1 sample1_replicate2 sample2_replicate1
#> gene1                  2                  2                  4
#> gene2                  4                  4                  2
#>       sample2_replicate2
#> gene1                  4
#> gene2                  2
aggregate(object, by = samples, MARGIN = 2L)
#>             sample1 sample2
#> transcript1       2       4
#> transcript2       2       4
#> transcript3       4       2
#> transcript4       4       2
aggregateCols(object, by = samples)
#>             sample1 sample2
#> transcript1       2       4
#> transcript2       2       4
#> transcript3       4       2
#> transcript4       4       2

## Matrix ====
object <- as(counts, "sparseMatrix")
print(object)
#> 4 x 4 sparse Matrix of class "dgCMatrix"
#>             sample1_replicate1 sample1_replicate2 sample2_replicate1
#> transcript1                  .                  2                  2
#> transcript2                  2                  .                  2
#> transcript3                  2                  2                  .
#> transcript4                  2                  2                  2
#>             sample2_replicate2
#> transcript1                  2
#> transcript2                  2
#> transcript3                  2
#> transcript4                  .
aggregate(object, by = genes, MARGIN = 1L)
#> 2 x 4 sparse Matrix of class "dgCMatrix"
#>       sample1_replicate1 sample1_replicate2 sample2_replicate1
#> gene1                  2                  2                  4
#> gene2                  4                  4                  2
#>       sample2_replicate2
#> gene1                  4
#> gene2                  2
aggregateRows(object, by = genes)
#> 2 x 4 sparse Matrix of class "dgCMatrix"
#>       sample1_replicate1 sample1_replicate2 sample2_replicate1
#> gene1                  2                  2                  4
#> gene2                  4                  4                  2
#>       sample2_replicate2
#> gene1                  4
#> gene2                  2
aggregate(object, by = samples, MARGIN = 2L)
#> 4 x 2 sparse Matrix of class "dgCMatrix"
#>             sample1 sample2
#> transcript1       2       4
#> transcript2       2       4
#> transcript3       4       2
#> transcript4       4       2
aggregateCols(object, by = samples)
#> 4 x 2 sparse Matrix of class "dgCMatrix"
#>             sample1 sample2
#> transcript1       2       4
#> transcript2       2       4
#> transcript3       4       2
#> transcript4       4       2

## SummarizedExperiment ====
object <- SummarizedExperiment::SummarizedExperiment(
    assays = S4Vectors::SimpleList(
        "counts" = counts
    ),
    rowData = S4Vectors::DataFrame(
        "aggregate" = genes
    ),
    colData = S4Vectors::DataFrame(
        "sampleName" = as.factor(names(samples)),
        "aggregate" = samples
    )
)
print(object)
#> class: SummarizedExperiment 
#> dim: 4 4 
#> metadata(0):
#> assays(1): counts
#> rownames(4): transcript1 transcript2 transcript3 transcript4
#> rowData names(1): aggregate
#> colnames(4): sample1_replicate1 sample1_replicate2 sample2_replicate1
#>   sample2_replicate2
#> colData names(2): sampleName aggregate
aggregate(object, MARGIN = 1L)
#> class: SummarizedExperiment 
#> dim: 2 4 
#> metadata(2): aggregate aggregateRows
#> assays(1): counts
#> rownames(2): gene1 gene2
#> rowData names(0):
#> colnames(4): sample1_replicate1 sample1_replicate2 sample2_replicate1
#>   sample2_replicate2
#> colData names(2): sampleName aggregate
aggregateRows(object)
#> class: SummarizedExperiment 
#> dim: 2 4 
#> metadata(2): aggregate aggregateRows
#> assays(1): counts
#> rownames(2): gene1 gene2
#> rowData names(0):
#> colnames(4): sample1_replicate1 sample1_replicate2 sample2_replicate1
#>   sample2_replicate2
#> colData names(2): sampleName aggregate
aggregate(object, MARGIN = 2L)
#> class: SummarizedExperiment 
#> dim: 4 2 
#> metadata(2): aggregate aggregateCols
#> assays(1): counts
#> rownames(4): transcript1 transcript2 transcript3 transcript4
#> rowData names(1): aggregate
#> colnames(2): sample1 sample2
#> colData names(0):
aggregateCols(object)
#> class: SummarizedExperiment 
#> dim: 4 2 
#> metadata(2): aggregate aggregateCols
#> assays(1): counts
#> rownames(4): transcript1 transcript2 transcript3 transcript4
#> rowData names(1): aggregate
#> colnames(2): sample1 sample2
#> colData names(0):