Phenotype services

In this example, we’re going to explore the capabilities of the phenotype-services using the gorr package.

Load packages

First load the gorr package, the tidyverse package is recommended in general, but not required for this example

library(gorr)
library(magrittr) # pipe
library(tibble)

Connect to Genuity Science’s services.

First we’ll need to establish a connection to our API services. To do that we’ll need to call platform_connect and provide it with the relevant parameters pointing to the phenotype-catalog-service, i.e. api_key and project:

conn <- platform_connect(api_key = Sys.getenv("GOR_API_KEY"),
                          project = Sys.getenv("GOR_API_PROJECT"))
conn
#> ── GOR API service connection ──────────────────────────────────────────────────
#> • Service Root/s: https://platform.wuxinextcodedev.com/api/query, https://platform.wuxinextcodedev.com/api/phenotype-catalog, https://platform.wuxinextcodedev.com/queryserver, https://platform.wuxinextcodedev.com/workflow
#> • Project: ukbb_hg38
#> • API key issued at: 2022-05-18 10:18:54
#> • API key expires at: Never
#> • Access token issued at: 2022-06-16 15:56:13
#> • Access token expires at: 2022-06-17 15:56:13

If everything goes as planned, we’ll have a conn object to pass into subsequent functions.

List project phenotypes

Let’s start by fethcing available phenotypes in project (first 25)

phenotypes <- get_phenotypes(conn, limit=25)
print(phenotypes[1:4])
#> $rtest_pheno75
#> ── Phenotype ───────────────────────────────────────────────────────────────────
#>  $name: rtest_pheno75
#>  $description: This is a test phenotype
#>  $result_type: CATEGORY
#>  $tag_list: 
#>  $pn_count: 
#>  $query: 
#> 
#> $test_pheno100
#> ── Phenotype ───────────────────────────────────────────────────────────────────
#>  $name: test_pheno100
#>  $description: This is a test phenotype
#>  $result_type: QT
#>  $tag_list: 
#>  $pn_count: 
#>  $query: 
#> 
#> $test_pheno67
#> ── Phenotype ───────────────────────────────────────────────────────────────────
#>  $name: test_pheno67
#>  $description: This is a test phenotype
#>  $result_type: QT
#>  $tag_list: 
#>  $pn_count: 
#>  $query: 
#> 
#> $test_pheno99
#> ── Phenotype ───────────────────────────────────────────────────────────────────
#>  $name: test_pheno99
#>  $description: 
#>  $result_type: CATEGORY
#>  $tag_list: 
#>  $pn_count: 
#>  $query:

The results come back as a list of phenotypes

Get phenotype

Next, let’s fetch a phenotype from the project. We’ll use the first listed

phenotype <- get_phenotype(name = names(phenotypes)[1], conn)
phenotype
#> ── Phenotype ───────────────────────────────────────────────────────────────────
#>  $name: rtest_pheno75
#>  $description: This is a test phenotype
#>  $result_type: CATEGORY
#>  $tag_list: 
#>  $pn_count: 
#>  $query:

The results come back as a phenotype object, which is a list of lists containing different info on the phenotype object.

Create phenotype

We can add a new phenotype to the project using the create_phenotype function as follows

name <- paste0("rtest_pheno", sample(1:99,1)) # Name of new phenotype
result_type <- "CATEGORY" # Type of phenotype (either "QT", "SET" or "CATEGORY")
description <- "This is a test phenotype" # Optional phenotype description
new_phenotype <- create_phenotype(name, result_type, conn, description)
new_phenotype
#> ── Phenotype ───────────────────────────────────────────────────────────────────
#>  $name: rtest_pheno50
#>  $description: This is a test phenotype
#>  $result_type: CATEGORY
#>  $tag_list: 
#>  $pn_count: 
#>  $query:

A new phenotype has now been added to the project’s phenotype-catalog as well as been assigned to the variable new_phenotype.

Phenotypes can also be created using the full set of metadata in a single call and with a GOR/NOR query.

##NOT RUN##
gor_query <- "
def #phenos# = added_salt_to_food;
create ##aggregate## = nor UKBB/phenotypes/fields.nord -s phenotype -f #phenos#
    | ATMAX visit_id -gc PN,phenotype
    | map -h -m '#empty#' -c phenotype,value <(nor -h UKBB/phenotypes/meta/field_encoding_lookup.tsv)
    | REPLACE value if(meaning = '#empty#',value,meaning)
    | hide meaning,visit_id,index_id;
nor [##aggregate##] | sort -c PN
    | pivot phenotype -ordered -gc PN -v #phenos# -e NA
    | rename (.*)_value #{1}
    | select PN
"

name2 <-  paste0("rtest_pheno", sample(1:99,1))
description2 <- "A set of individuals who answered YES to the question: do you salt your food? https://biobank.ndph.ox.ac.uk/ukb/field.cgi?id=104660"
result_type2 <- "SET"
category2 <- "symptoms"
query2 <- gor_query
tags2 <- "dietary,nutrition"

new_phenotype2 <- create_phenotype(name2, result_type2, conn, description2, query = query2, tags = tags2)
new_phenotype2
phenotype_delete(new_phenotype2, conn)

If we see the phenotype’s state is ‘pending’, or we simply want to refresh the phenotype for fetching most recent info on the phenotype,
we can refresh the phenotype using the phenotype_refresh method

new_phenotype <- phenotype_refresh(new_phenotype, conn)
#> Warning in deprecated_argument_msg(conn) %>% warning(): conn argument deprecated
new_phenotype
#> ── Phenotype ───────────────────────────────────────────────────────────────────
#>  $name: rtest_pheno50
#>  $description: This is a test phenotype
#>  $result_type: CATEGORY
#>  $tag_list: 
#>  $pn_count: 
#>  $query:

In case the query fails we can call either phenotype_get_error(phenotype, conn), returning latest error (formatted) or phenotype_get_errors(phenotype, conn) which returns all errors as lists along with their timestamps.

Update phenotype description

Passing a phenotype object and a new description to phenotype_update_description updates the phenotype’s description in the project’s phenotype-catalog. The output is, again, an updated phenotype object.

new_description <- "An updated description"
new_phenotype <- phenotype_update_description(new_description, new_phenotype, conn)
new_phenotype
#> ── Phenotype ───────────────────────────────────────────────────────────────────
#>  $name: rtest_pheno50
#>  $description: An updated description
#>  $result_type: CATEGORY
#>  $tag_list: 
#>  $pn_count: 
#>  $query:

Upload phenotype data

Let’s start by creating a dummy dataset which is a list of list: [[PN#1, attribute], [PN#2, attribute], … , [PN#N, attribute]]

cohort_size <- 2000
pns <- as.character(1000:cohort_size+1000)
data <- lapply(pns, function(x) { list(x, if (runif(1)>0.5) 'obease' else 'lean') })
print(data[1:2])
#> [[1]]
#> [[1]][[1]]
#> [1] "2000"
#> 
#> [[1]][[2]]
#> [1] "lean"
#> 
#> 
#> [[2]]
#> [[2]][[1]]
#> [1] "2001"
#> 
#> [[2]][[2]]
#> [1] "lean"

phenotype_upload_data(new_phenotype, data, conn)
#> Successfully uploaded phenotype data

Get phenotype data

Phenotype data can then be fetched using get_data:

get_data(new_phenotype, conn)
#> Warning in get_data.phenotype(new_phenotype, conn): phenotype structure provided
#> - conn argument ignored
#> # A tibble: 1,001 × 2
#>       pn rtest_pheno50
#>    <dbl> <chr>        
#>  1  2000 lean         
#>  2  2001 lean         
#>  3  2002 lean         
#>  4  2003 obease       
#>  5  2004 obease       
#>  6  2005 lean         
#>  7  2006 lean         
#>  8  2007 obease       
#>  9  2008 lean         
#> 10  2009 lean         
#> # … with 991 more rows

Get all tags available in the phenotype catalog

We can list all available tags in the catalog using get_tags and passing the platform_connection object.

get_tags(conn)[1:4]
#> [1] "testTag4"         "another-test-tag" "test-tag"         "Cases"

Get phenotype tags

Retrieve all tags for phenotype

phenotype_get_tags(new_phenotype)
#> NULL

Add phenotype tag/s

Add a new tag/s to this phenotype. Multiple tags should be separated by either comma separated string, "tag1,tag2", character vector c("tag1", "tag2") or a combination of the two

new_phenotype <- phenotype_add_tag(tag = "testTag2", new_phenotype, conn)
#> Warning: 'phenotype_add_tag' is deprecated.
#> Use 'phenotype_add_tags' instead.
#> See help("Deprecated")
phenotype_get_tags(new_phenotype)
#> [1] "testTag2"

for assurance we can get the phenotype from server and check tags

get_phenotype(name, conn) %>% phenotype_get_tags()
#> [1] "testTag2"

Set phenotype tag/s

Set the tag list for this phenotype, overriding all previous tags, defining the tags adheres to the same rules as add_phenotype_tag

tags <- "testTag7,testTag8"
new_phenotype <- phenotype_set_tags(tags, new_phenotype, conn)
phenotype_get_tags(new_phenotype)
#> [1] "testTag7" "testTag8"

again, for assurance let’s load the tags from server

get_phenotype(name, conn) %>% phenotype_get_tags()
#> [1] "testTag7" "testTag8"

Delete phenotype tag/s

Delete a tag/s from phenotype.

new_phenotype <- phenotype_delete_tag(tag = "testTag7", new_phenotype, conn)
#> Warning: 'phenotype_delete_tag' is deprecated.
#> Use 'phenotype_delete_tags' instead.
#> See help("Deprecated")
phenotype_get_tags(new_phenotype)
#> [1] "testTag8"
#or get_phenotype(name, conn) %>% phenotype_get_tags()

Delete phenotype

A phenotype can easily be removed from a project using the phenotype_delete function. phenotype_delete expects a phenotype object. Therefore the phenotype needs to be fetched using the get_phenotype function if it has not been assigned to a variable already.

phenotype_delete(new_phenotype, conn)
#> Warning in deprecated_argument_msg(conn) %>% warning(): conn argument deprecated
#> Response [https://platform.wuxinextcodedev.com/api/phenotype-catalog/projects/ukbb_hg38/phenotypes/rtest_pheno50]
#>   Date: 2022-06-16 15:56
#>   Status: 204
#>   Content-Type: <unknown>
#> <EMPTY BODY>

Andri M. Stefansson andri@genuitysci.com

2022-06-16