Phenotype matrix services

In this example, we’re going to explore the capabilities of the phenotype matrix services using the gorr package.

Load packages

First load the gorr package, the tidyverse package is recommended in general, but not required for this example

library(gorr)
library(magrittr) # pipe
library(tibble)

Connect to Genuity Science’s services.

First we’ll need to establish a connection to our API services. To do that we’ll need to call platform_connect and provide it with the relevant parameters pointing to the phenotype-catalog-service, i.e. api_key and project:

conn <- platform_connect(api_key = Sys.getenv("GOR_API_KEY"),
                          project = Sys.getenv("GOR_API_PROJECT"))
conn
#> ── GOR API service connection ──────────────────────────────────────────────────
#> • Service Root/s: https://platform.wuxinextcodedev.com/api/query, https://platform.wuxinextcodedev.com/api/phenotype-catalog, https://platform.wuxinextcodedev.com/queryserver, https://platform.wuxinextcodedev.com/workflow
#> • Project: ukbb_hg38
#> • API key issued at: 2022-05-18 10:18:54
#> • API key expires at: Never
#> • Access token issued at: 2022-06-16 15:56:01
#> • Access token expires at: 2022-06-17 15:56:01

If everything goes as planned, we’ll have a conn object to pass into subsequent functions.

List project phenotypes

Let’s start by listing available phenotypes in project with “Cases” as one of its tags

recipe_tag = "Cases"
phenos<- get_phenotypes(conn, any_tags = recipe_tag,  limit = 20)
phenos[1:3]
#> $phecode_case_180.3
#> ── Phenotype ───────────────────────────────────────────────────────────────────
#>  $name: phecode_case_180.3
#>  $description: Cervical intraepithelial neoplasia [CIN] [Cervical dysplasia]
#>  $result_type: SET
#>  $tag_list: Cases, Phecode, judge, pair
#>  $pn_count: 2081
#>  $query: 
#> 
#> $garpur_test_153
#> ── Phenotype ───────────────────────────────────────────────────────────────────
#>  $name: garpur_test_153
#>  $description: All results from <a href="https://ruv.is">ruv</a> random sample 200k plink 2.5 word soup long description for a phenotype catalog entry that we want to see what happens when it's really really long. Like super long. Like longer than usual.
#>  $result_type: CATEGORY
#>  $tag_list: Cases, garpur_test_tag_1, physical_measurements
#>  $pn_count: 200643
#>  $query: nor freezes/ukbb_wes200k_plink25/buckets.tsv 
#> | calc value if(random()>0.9,'HIGH',if(random()>0.2,'MOD','LOW')) |
#>  select PN,value
#> 
#> 
#> $phecode_case_255.21
#> ── Phenotype ───────────────────────────────────────────────────────────────────
#>  $name: phecode_case_255.21
#>  $description: Glucocorticoid deficiency
#>  $result_type: SET
#>  $tag_list: Cases, Phecode
#>  $pn_count: 439
#>  $query:

The results come back as a vector of phenotypes

phenos_subset_names <- sample(names(phenos),5)
phenos_subset_names 
#> [1] "phecode_case_253.3"  "phecode_case_255.21" "phecode_case_255.11"
#> [4] "phecode_case_253.11" "phecode_case_180.3"

Initialize phenotype matrix

We initialize an empty phenotype matrix object by running get_phenotype_matrix optionally passing the basevariable.

pheno_matrix <- get_phenotype_matrix()
class(pheno_matrix)
#> [1] "phenotype_matrix"
pheno_matrix
#> $base
#> NULL
#> 
#> $phenotypes
#> list()
#> 
#> attr(,"class")
#> [1] "phenotype_matrix"

Adding phenotype/s to the phenotype matrix

To add phenotype/s to the phenotype matrix we use either phemat_add_phenotype for adding a single phenotype to the matrix or phemat_add_phenotypes to add multiples. Note that these methods do not have the same input arguments * phemat_add_phenotype(name, phenotype_matrix, missing_value, label) * phemat_add_phenotypes(names, phenotype_matrix, missing_value)

pheno_matrix <- phemat_add_phenotypes(phenos_subset_names, pheno_matrix,  missing_value = '-99')

#> [1] "pheno_matrix class: phenotype_matrix"
#> [1] "pheno_matrix content: base, phenotypes"
#> [1] "pheno_matrix phenotypes: phecode_case_253.3, phecode_case_255.21, phecode_case_255.11, phecode_case_253.11, phecode_case_180.3"

Remove phenotype from matrix

rm_pheno <- names(pheno_matrix$phenotypes)[1]
paste("Removing phenotype:", rm_pheno)
#> [1] "Removing phenotype: phecode_case_253.3"
pheno_matrix <- phemat_remove_phenotype(name=rm_pheno, pheno_matrix)
pheno_matrix
#> $base
#> NULL
#> 
#> $phenotypes
#> $phenotypes$phecode_case_255.21
#> $phenotypes$phecode_case_255.21$name
#> [1] "phecode_case_255.21"
#> 
#> $phenotypes$phecode_case_255.21$missing_value
#> [1] "-99"
#> 
#> $phenotypes$phecode_case_255.21$label
#> [1] "phecode_case_255.21"
#> 
#> 
#> $phenotypes$phecode_case_255.11
#> $phenotypes$phecode_case_255.11$name
#> [1] "phecode_case_255.11"
#> 
#> $phenotypes$phecode_case_255.11$missing_value
#> [1] "-99"
#> 
#> $phenotypes$phecode_case_255.11$label
#> [1] "phecode_case_255.11"
#> 
#> 
#> $phenotypes$phecode_case_253.11
#> $phenotypes$phecode_case_253.11$name
#> [1] "phecode_case_253.11"
#> 
#> $phenotypes$phecode_case_253.11$missing_value
#> [1] "-99"
#> 
#> $phenotypes$phecode_case_253.11$label
#> [1] "phecode_case_253.11"
#> 
#> 
#> $phenotypes$phecode_case_180.3
#> $phenotypes$phecode_case_180.3$name
#> [1] "phecode_case_180.3"
#> 
#> $phenotypes$phecode_case_180.3$missing_value
#> [1] "-99"
#> 
#> $phenotypes$phecode_case_180.3$label
#> [1] "phecode_case_180.3"
#> 
#> 
#> 
#> attr(,"class")
#> [1] "phenotype_matrix"

Get data

Lastly, let’s fetch a phenotype from the project. We’ll use the first listed

pheno_data <-  get_data(pheno_matrix, conn)

print(pheno_data)
#> # A tibble: 2,692 × 5
#>         pn phecode_case_255.… phecode_case_25… phecode_case_25… phecode_case_18…
#>      <dbl>              <dbl>            <dbl>            <dbl>            <dbl>
#>  1 1002261                -99              -99              -99                1
#>  2 1005939                -99              -99              -99                1
#>  3 1006278                  1              -99              -99              -99
#>  4 1010500                -99              -99              -99                1
#>  5 1011704                -99              -99              -99                1
#>  6 1012415                  1              -99              -99              -99
#>  7 1012521                -99              -99              -99                1
#>  8 1012811                -99              -99              -99                1
#>  9 1018263                  1              -99              -99              -99
#> 10 1022873                -99              -99              -99                1
#> # … with 2,682 more rows

Andri M. Stefansson andri@genuitysci.com