Display factR2 object data — factR-meta • factR2

A factRObject-class contains different types of data at the gene, transcript, alternative splicing (AS) and protein domain levels. The functions below are designed to display specific contents of a factRObject.

# S4 method for factR
granges(object, ..., set = NULL)

# S4 method for factR
activeSet(object)

# S4 method for factR
activeSet(object) <- value

# S4 method for factR
listSets(object)

# S4 method for factR
features(object, ..., set = NULL, show_more = FALSE)

# S4 method for factR
genes(object, ..., show_more = FALSE)

# S4 method for factR
gns(object, ..., show_more = FALSE)

# S4 method for factR
transcripts(object, ..., show_more = FALSE)

# S4 method for factR
txs(object, ..., show_more = FALSE)

# S4 method for factR
ase(object, ..., show_more = FALSE)

# S4 method for factR
domains(object, ...)

Arguments

object

factRObject

...

One or more features to display. Can be the following:

gene_id: ID of gene to plot
gene_name: Name of gene to plot
transcript_id: ID of transcript to plot

set

Set metadata to display. Can be "gene", "transcript" or "AS".

value

Character value of one of the following: "gene", "transcript" or "AS"

Value

`granges`: GenomicRanges object of selected features
`activeSet` and `listSets`: Character value/vector
All other functions: Tibble dataframe containing metadata of selected features

Examples

### Load sample factRObject
data("factRsample")

## Prints out activeSet
activeSet(factRsample)
#> [1] "AS"

## Change activeSet
activeSet(factRsample) <- "transcript"

## Returns coordinates and metadata of features as a GenomicRanges object
granges(factRsample)   # from activeSet
#> GRanges object with 1149 ranges and 13 metadata columns:
#>          seqnames          ranges strand |               gene_id    source
#>             <Rle>       <IRanges>  <Rle> |           <character>  <factor>
#>      [1]    chr15 3180731-3180944      * |           MSTRG.14523 StringTie
#>      [2]    chr15 3180731-3180944      * |           MSTRG.14523 StringTie
#>      [3]    chr15 3268547-3277274      + | ENSMUSG00000064373.12 StringTie
#>      [4]    chr15 3268547-3268768      + | ENSMUSG00000064373.12 StringTie
#>      [5]    chr15 3274661-3274876      + | ENSMUSG00000064373.12 StringTie
#>      ...      ...             ...    ... .                   ...       ...
#>   [1145]    chr15 7305070-7398395      - | ENSMUSG00000042961.13 StringTie
#>   [1146]    chr15 7305070-7305219      - | ENSMUSG00000042961.13 StringTie
#>   [1147]    chr15 7316399-7316482      - | ENSMUSG00000042961.13 StringTie
#>   [1148]    chr15 7318241-7318350      - | ENSMUSG00000042961.13 StringTie
#>   [1149]    chr15 7397988-7398395      - | ENSMUSG00000042961.13 StringTie
#>                type   gene_name match_level     score     phase
#>            <factor> <character>   <numeric> <numeric> <integer>
#>      [1] transcript        <NA>           5      1000      <NA>
#>      [2] exon              <NA>           5      1000      <NA>
#>      [3] transcript     Selenop           1      1000      <NA>
#>      [4] exon           Selenop           1      1000      <NA>
#>      [5] exon           Selenop           1      1000      <NA>
#>      ...        ...         ...         ...       ...       ...
#>   [1145] transcript      Egflam           1      1000      <NA>
#>   [1146] exon            Egflam           1      1000      <NA>
#>   [1147] exon            Egflam           1      1000      <NA>
#>   [1148] exon            Egflam           1      1000      <NA>
#>   [1149] exon            Egflam           1      1000      <NA>
#>                 transcript_id exon_number           ref_gene_id old_gene_id
#>                   <character> <character>           <character> <character>
#>      [1]        MSTRG.14523.1        <NA>                  <NA> MSTRG.14523
#>      [2]        MSTRG.14523.1           1                  <NA> MSTRG.14523
#>      [3] ENSMUST00000160787.8        <NA> ENSMUSG00000064373.12 MSTRG.14525
#>      [4] ENSMUST00000160787.8           1 ENSMUSG00000064373.12 MSTRG.14525
#>      [5] ENSMUST00000160787.8           2 ENSMUSG00000064373.12 MSTRG.14525
#>      ...                  ...         ...                   ...         ...
#>   [1145] ENSMUST00000159726.1        <NA> ENSMUSG00000042961.13 MSTRG.14582
#>   [1146] ENSMUST00000159726.1           1 ENSMUSG00000042961.13 MSTRG.14582
#>   [1147] ENSMUST00000159726.1           2 ENSMUSG00000042961.13 MSTRG.14582
#>   [1148] ENSMUST00000159726.1           3 ENSMUSG00000042961.13 MSTRG.14582
#>   [1149] ENSMUST00000159726.1           4 ENSMUSG00000042961.13 MSTRG.14582
#>               AStype       AS_id
#>          <character> <character>
#>      [1]        <NA>        <NA>
#>      [2]        <NA>        <NA>
#>      [3]        <NA>        <NA>
#>      [4]        <NA>        <NA>
#>      [5]        <NA>        <NA>
#>      ...         ...         ...
#>   [1145]        <NA>        <NA>
#>   [1146]        <NA>        <NA>
#>   [1147]        <NA>        <NA>
#>   [1148]        <NA>        <NA>
#>   [1149]        <NA>        <NA>
#>   -------
#>   seqinfo: 1 sequence from an unspecified genome; no seqlengths
granges(factRsample, "Dab2")   # specific features
#> GRanges object with 142 ranges and 13 metadata columns:
#>         seqnames          ranges strand |               gene_id    source
#>            <Rle>       <IRanges>  <Rle> |           <character>  <factor>
#>     [1]    chr15 6299788-6435800      + | ENSMUSG00000022150.16 StringTie
#>     [2]    chr15 6299788-6299899      + | ENSMUSG00000022150.16 StringTie
#>     [3]    chr15 6416806-6416994      + | ENSMUSG00000022150.16 StringTie
#>     [4]    chr15 6418230-6418369      + | ENSMUSG00000022150.16 StringTie
#>     [5]    chr15 6419739-6419837      + | ENSMUSG00000022150.16 StringTie
#>     ...      ...             ...    ... .                   ...       ...
#>   [138]    chr15 6424635-6426667      + | ENSMUSG00000022150.16 StringTie
#>   [139]    chr15 6321926-6322138      * | ENSMUSG00000022150.16 StringTie
#>   [140]    chr15 6321926-6322138      * | ENSMUSG00000022150.16 StringTie
#>   [141]    chr15 6408725-6408926      * | ENSMUSG00000022150.16 StringTie
#>   [142]    chr15 6408725-6408926      * | ENSMUSG00000022150.16 StringTie
#>               type   gene_name match_level     score     phase
#>           <factor> <character>   <numeric> <numeric> <integer>
#>     [1] transcript        Dab2           1      1000      <NA>
#>     [2] exon              Dab2           1      1000      <NA>
#>     [3] exon              Dab2           1      1000      <NA>
#>     [4] exon              Dab2           1      1000      <NA>
#>     [5] exon              Dab2           1      1000      <NA>
#>     ...        ...         ...         ...       ...       ...
#>   [138] exon              Dab2           1      1000      <NA>
#>   [139] transcript        Dab2           4      1000      <NA>
#>   [140] exon              Dab2           4      1000      <NA>
#>   [141] transcript        Dab2           4      1000      <NA>
#>   [142] exon              Dab2           4      1000      <NA>
#>                transcript_id exon_number           ref_gene_id old_gene_id
#>                  <character> <character>           <character> <character>
#>     [1] ENSMUST00000159552.1        <NA> ENSMUSG00000022150.16 MSTRG.14562
#>     [2] ENSMUST00000159552.1           1 ENSMUSG00000022150.16 MSTRG.14562
#>     [3] ENSMUST00000159552.1           2 ENSMUSG00000022150.16 MSTRG.14562
#>     [4] ENSMUST00000159552.1           3 ENSMUSG00000022150.16 MSTRG.14562
#>     [5] ENSMUST00000159552.1           4 ENSMUSG00000022150.16 MSTRG.14562
#>     ...                  ...         ...                   ...         ...
#>   [138] ENSMUST00000161558.1           4 ENSMUSG00000022150.16 MSTRG.14562
#>   [139]        MSTRG.14564.1        <NA>                  <NA> MSTRG.14564
#>   [140]        MSTRG.14564.1           1                  <NA> MSTRG.14564
#>   [141]        MSTRG.14566.1        <NA>                  <NA> MSTRG.14566
#>   [142]        MSTRG.14566.1           1                  <NA> MSTRG.14566
#>              AStype       AS_id
#>         <character> <character>
#>     [1]        <NA>        <NA>
#>     [2]        <NA>        <NA>
#>     [3]        <NA>        <NA>
#>     [4]        <NA>        <NA>
#>     [5]        <NA>        <NA>
#>     ...         ...         ...
#>   [138]        <NA>        <NA>
#>   [139]        <NA>        <NA>
#>   [140]        <NA>        <NA>
#>   [141]        <NA>        <NA>
#>   [142]        <NA>        <NA>
#>   -------
#>   seqinfo: 1 sequence from an unspecified genome; no seqlengths
granges(factRsample, "Dab2", set = "gene")   # specific features from different Set
#> GRanges object with 1 range and 13 metadata columns:
#>       seqnames          ranges strand |               gene_id   source     type
#>          <Rle>       <IRanges>  <Rle> |           <character> <factor> <factor>
#>   [1]    chr15 6299788-6440712      + | ENSMUSG00000022150.16   factR2     gene
#>         gene_name match_level     score     phase transcript_id exon_number
#>       <character>   <numeric> <numeric> <integer>   <character> <character>
#>   [1]        Dab2           4        NA      <NA>          <NA>        <NA>
#>       ref_gene_id old_gene_id      AStype       AS_id
#>       <character> <character> <character> <character>
#>   [1]        <NA>        <NA>        <NA>        <NA>
#>   -------
#>   seqinfo: 1 sequence from an unspecified genome; no seqlengths

## Returns metadata of features
features(factRsample)   # from activeSet
#> ℹ Set `show_more to TRUE to show more info`
#> # A tibble: 154 × 8
#>    transcript_id         gene_id        gene_name strand width novel cds   nmd  
#>    <chr>                 <chr>          <chr>     <fct>  <int> <chr> <chr> <chr>
#>  1 ENSMUST00000022746.12 ENSMUSG000000… Osmr      -       5491 no    no    no   
#>  2 ENSMUST00000175862.7  ENSMUSG000000… Osmr      -       2837 no    no    no   
#>  3 ENSMUST00000176554.1  ENSMUSG000000… Osmr      -        908 no    no    no   
#>  4 ENSMUST00000176826.1  ENSMUSG000000… Osmr      -       4028 no    no    no   
#>  5 ENSMUST00000177263.1  ENSMUSG000000… Osmr      -       6160 no    no    no   
#>  6 ENSMUST00000177478.1  ENSMUSG000000… Osmr      -       1727 no    no    no   
#>  7 MSTRG.14573.1         ENSMUSG000000… Osmr      *        877 yes   no    no   
#>  8 ENSMUST00000090461.11 ENSMUSG000000… Fyb       +       2678 no    no    no   
#>  9 ENSMUST00000159604.1  ENSMUSG000000… Fyb       +        771 no    no    no   
#> 10 ENSMUST00000159714.1  ENSMUSG000000… Fyb       +       2170 no    no    no   
#> # ℹ 144 more rows
features(factRsample, "Dab2")   # specific features
#> ℹ Set `show_more to TRUE to show more info`
#> # A tibble: 16 × 8
#>    transcript_id         gene_id        gene_name strand width novel cds   nmd  
#>    <chr>                 <chr>          <chr>     <fct>  <int> <chr> <chr> <chr>
#>  1 ENSMUST00000078019.12 ENSMUSG000000… Dab2      +       3793 no    no    no   
#>  2 ENSMUST00000080880.11 ENSMUSG000000… Dab2      +       4670 no    no    no   
#>  3 ENSMUST00000110663.8  ENSMUSG000000… Dab2      +       3838 no    no    no   
#>  4 ENSMUST00000110664.8  ENSMUSG000000… Dab2      +       4457 no    no    no   
#>  5 ENSMUST00000159046.1  ENSMUSG000000… Dab2      +        438 no    no    no   
#>  6 ENSMUST00000159490.7  ENSMUSG000000… Dab2      +        444 no    no    no   
#>  7 ENSMUST00000159552.1  ENSMUSG000000… Dab2      +        922 no    no    no   
#>  8 ENSMUST00000160134.7  ENSMUSG000000… Dab2      +       1460 no    no    no   
#>  9 ENSMUST00000161040.7  ENSMUSG000000… Dab2      +       1184 no    no    no   
#> 10 ENSMUST00000161558.1  ENSMUSG000000… Dab2      +       2119 no    no    no   
#> 11 ENSMUST00000161812.7  ENSMUSG000000… Dab2      +       3774 no    no    no   
#> 12 ENSMUST00000162094.2  ENSMUSG000000… Dab2      +        737 no    no    no   
#> 13 ENSMUST00000162140.1  ENSMUSG000000… Dab2      +       1406 no    no    no   
#> 14 ENSMUST00000163082.1  ENSMUSG000000… Dab2      +       3839 no    no    no   
#> 15 MSTRG.14564.1         ENSMUSG000000… Dab2      *        213 yes   no    no   
#> 16 MSTRG.14566.1         ENSMUSG000000… Dab2      *        202 yes   no    no   
features(factRsample, "Dab2", set = "gene")   # specific features from different Set
#> ℹ Set `show_more to TRUE to show more info`
#> # A tibble: 1 × 5
#>   gene_id               gene_name strand  width match_level
#>   <chr>                 <chr>     <fct>   <int>       <dbl>
#> 1 ENSMUSG00000022150.16 Dab2      +      140925           4

### This is the same as:
genes(factRsample, "Dab2")
#> ℹ Set `show_more to TRUE to show more info`
#> # A tibble: 1 × 5
#>   gene_id               gene_name strand  width match_level
#>   <chr>                 <chr>     <fct>   <int>       <dbl>
#> 1 ENSMUSG00000022150.16 Dab2      +      140925           4


## To return protein-coding domains, the protein-coding domains need to be predicted first:
factRsample <- buildCDS(factRsample)
factRsample <- getAAsequence(factRsample)
factRsample <- predictDomains(factRsample, "Dab2")
#> ℹ Set `show_more to TRUE to show more info`
#> Warning: Skipped 6 non-coding transcripts

## Then, the domains of the selected gene can be printed as such:
domains(factRsample, "Dab2")
#> # A tibble: 8 × 6
#>   type   description    eval    begin   end transcript_id        
#>   <chr>  <chr>          <chr>   <dbl> <dbl> <chr>                
#> 1 DOMAIN PH domain-like 2.7e-42    33   180 ENSMUST00000078019.12
#> 2 DOMAIN PH domain-like 4.9e-42    33   180 ENSMUST00000080880.11
#> 3 DOMAIN PH domain-like 2.7e-42    33   180 ENSMUST00000110663.8 
#> 4 DOMAIN PH domain-like 4.7e-42    33   180 ENSMUST00000110664.8 
#> 5 DOMAIN PH domain-like 3.9e-43    33   180 ENSMUST00000160134.7 
#> 6 DOMAIN PH domain-like 3.8e-43    33   180 ENSMUST00000161040.7 
#> 7 DOMAIN PH domain-like 2.5e-42    33   180 ENSMUST00000161812.7 
#> 8 DOMAIN PH domain-like 2.6e-21    35   110 ENSMUST00000162094.2 

## All outputs can be assigned to a variable and manipulated further using other functions:
ase(factRsample) %>% dplyr::filter(AStype == "CE")
#> ℹ Set `show_more to TRUE to show more info`
#> # A tibble: 37 × 7
#>    AS_id   gene_id               gene_name coord             AStype strand width
#>    <chr>   <chr>                 <chr>     <chr>             <fct>  <fct>  <int>
#>  1 AS00005 ENSMUSG00000064373.12 Selenop   chr15:3272755-32… CE     +         51
#>  2 AS00029 ENSMUSG00000022186.14 Oxct1     chr15:4059936-40… CE     +         61
#>  3 AS00030 ENSMUSG00000022186.14 Oxct1     chr15:4091168-40… CE     +        108
#>  4 AS00031 ENSMUSG00000022186.14 Oxct1     chr15:4092411-40… CE     +        115
#>  5 AS00033 ENSMUSG00000022186.14 Oxct1     chr15:4094015-40… CE     +         95
#>  6 AS00034 ENSMUSG00000022186.14 Oxct1     chr15:4096472-40… CE     +         49
#>  7 AS00035 ENSMUSG00000022186.14 Oxct1     chr15:4101127-41… CE     +         73
#>  8 AS00036 ENSMUSG00000022186.14 Oxct1     chr15:4101804-41… CE     +         76
#>  9 AS00037 ENSMUSG00000022186.14 Oxct1     chr15:4128834-41… CE     +         90
#> 10 AS00039 ENSMUSG00000022186.14 Oxct1     chr15:4142792-41… CE     +         81
#> # ℹ 27 more rows