---
title: "Supplementary tables for prophage analysis"
author: "Lakhansing Pardeshi"
date: "`r Sys.Date()`"
format:
html:
embed-resources: true
df-print: paged
knitr:
opts_chunk:
fig.height: 7
---
```{r}
#| label: setup
#| echo: false
#| warning: false
suppressPackageStartupMessages (library (tidyverse))
suppressPackageStartupMessages (library (org.Pectobacterium.spp.pan.eg.db))
suppressPackageStartupMessages (library (DT))
suppressPackageStartupMessages (library (openxlsx))
rm (list = ls ())
source ("https://raw.githubusercontent.com/lakhanp1/omics_utils/main/RScripts/utils.R" )
source ("scripts/utils/config_functions.R" )
source ("scripts/utils/homology_groups.R" )
################################################################################
set.seed (124 )
confs <- prefix_config_paths (
conf = suppressWarnings (configr:: read.config (file = "project_config.yaml" )),
dir = "."
)
pangenome <- confs$ data$ pangenomes$ pectobacterium.v2$ name
panConf <- confs$ data$ pangenomes[[pangenome]]
panOrgDb <- org.Pectobacterium.spp.pan.eg.db
## write tables to excel file
wb <- openxlsx:: createWorkbook ()
table_counter <- 1
```
::: {.panel-tabset}
## Table S`r table_counter`
Metadata for all the genome in the current pangenome
```{r}
#| echo: false
#| column: screen-inset-right
metadata <- suppressMessages (readr:: read_csv (panConf$ files$ metadata)) %>%
dplyr:: select (- starts_with ("nodepath." ), - Genome) %>%
dplyr:: select (
genomeId, source, SpeciesName, strain, geo_loc_country, collection_year, host,
blackleg_phenotype = virulence, blackleg_PCR = virulence_pcr,
AssemblyAccession, BioSampleAccn, everything ()
)
currentSheet <- paste ("Table_S" , table_counter, sep = "" )
openxlsx:: addWorksheet (wb, sheetName = currentSheet)
openxlsx:: writeData (
wb = wb, sheet = currentSheet,
x = paste ("## Table S" , table_counter, " Metadata for all the genome in the current pangenome" , sep = "" )
)
openxlsx:: writeDataTable (
wb = wb, x = metadata, sheet = currentSheet,
startCol = 1 , startRow = 2 , withFilter = TRUE ,
keepNA = TRUE , na.string = "NA"
)
openxlsx:: freezePane (wb = wb, sheet = currentSheet, firstActiveRow = 3 , firstActiveCol = 2 )
DT:: datatable (
data = metadata,
rownames = FALSE ,
filter = "top" ,
class = 'compact hover' ,
extensions = c ('KeyTable' , 'Scroller' , 'Select' , 'SearchBuilder' , 'FixedColumns' ),
options = list (
autoWidth = FALSE ,
dom = 'Qlfrtip' ,
scrollX = TRUE ,
fixedColumns = list (leftColumns = 2 ),
keys = TRUE ,
scroller = TRUE ,
scrollY = 600
),
selection = 'none'
)
table_counter <- table_counter + 1
```
## Table S`r table_counter`
Pangenome statistics: Homology group counts for the genus pangenome and
individual species pangenome
```{r}
#| echo: false
#| column: screen-inset-right
hg_stats <- suppressMessages (
readr:: read_tsv (confs$ analysis$ homology_groups$ files$ spp_group_stats)
) %>%
dplyr:: rename (
HGs_per_class = count,
HGs_pangenome = nHgs
)
currentSheet <- paste ("Table_S" , table_counter, sep = "" )
openxlsx:: addWorksheet (wb, sheetName = currentSheet)
openxlsx:: writeData (
wb = wb, sheet = currentSheet,
x = paste ("## Table S" , table_counter, ": Pangenome statistics: Homology group counts for the genus pangenome" ,
"and individual species pangenome" , sep = "" )
)
openxlsx:: writeDataTable (
wb = wb, x = hg_stats, sheet = currentSheet,
startCol = 1 , startRow = 2 , withFilter = TRUE ,
keepNA = TRUE , na.string = "NA"
)
openxlsx:: freezePane (wb = wb, sheet = currentSheet, firstActiveRow = 3 , firstActiveCol = 2 )
DT:: datatable (
data = hg_stats,
rownames = FALSE ,
filter = "top" ,
class = 'compact hover' ,
extensions = c ('KeyTable' , 'Scroller' , 'Select' , 'SearchBuilder' , 'FixedColumns' ),
options = list (
autoWidth = FALSE ,
dom = 'Qlfrtip' ,
scrollX = TRUE ,
fixedColumns = list (leftColumns = 2 ),
keys = TRUE ,
scroller = TRUE ,
scrollY = 600
),
selection = 'none'
)
table_counter <- table_counter + 1
```
## Table S`r table_counter`
GO enrichment for core, accessory and unique homology groups with respect to
the pangenome background
```{r}
#| echo: false
#| column: screen-inset-right
go_table <- suppressMessages (
readr:: read_tsv (confs$ analysis$ homology_groups$ files$ spp_group_go)
) %>%
dplyr:: filter (SpeciesName == "pangenome" ) %>%
dplyr:: select (- SpeciesName, - subpan_class)
currentSheet <- paste ("Table_S" , table_counter, sep = "" )
openxlsx:: addWorksheet (wb, sheetName = currentSheet)
openxlsx:: writeData (
wb = wb, sheet = currentSheet,
x = paste ("## Table S" , table_counter, ": GO enrichment for core, accessory and unique homology groups" ,
" with respect to the pangenome background" , sep = "" )
)
openxlsx:: writeDataTable (
wb = wb, x = go_table, sheet = currentSheet,
startCol = 1 , startRow = 2 , withFilter = TRUE ,
keepNA = TRUE , na.string = "NA"
)
openxlsx:: freezePane (wb = wb, sheet = currentSheet, firstActiveRow = 3 , firstActiveCol = 2 )
DT:: datatable (
data = go_table,
rownames = FALSE ,
filter = "top" ,
class = 'compact hover' ,
extensions = c ('KeyTable' , 'Scroller' , 'Select' , 'SearchBuilder' , 'FixedColumns' ),
options = list (
autoWidth = FALSE ,
dom = 'Qlfrtip' ,
scrollX = TRUE ,
fixedColumns = list (leftColumns = 2 ),
keys = TRUE ,
scroller = TRUE ,
scrollY = 600
),
selection = 'none'
)
table_counter <- table_counter + 1
```
## Table S`r table_counter`
geNomad prophage prediction result summary for 454 genomes
```{r}
#| echo: false
#| column: screen-inset-right
prophages <- suppressMessages (readr:: read_tsv (confs$ data$ prophages$ files$ data)) %>%
dplyr:: mutate (
integrated = dplyr:: if_else (
condition = is.na (start) & is.na (end), true = "N" , false = "Y"
)
) %>%
dplyr:: select (prophage_id, genomeId, everything ()) %>%
dplyr:: relocate (integrated, .after = end) %>%
dplyr:: left_join (
y = dplyr:: select (metadata, genomeId, strain, blackleg_phenotype, blackleg_PCR),
by = "genomeId"
) %>%
dplyr:: relocate (strain, blackleg_phenotype, blackleg_PCR, .after = SpeciesName)
currentSheet <- paste ("Table_S" , table_counter, sep = "" )
openxlsx:: addWorksheet (wb, sheetName = currentSheet)
openxlsx:: writeData (
wb = wb, sheet = currentSheet,
x = paste ("## Table S" , table_counter, ": geNomad prophage prediction result summary for 454 genomes" , sep = "" )
)
openxlsx:: writeDataTable (
wb = wb, x = prophages, sheet = currentSheet,
startCol = 1 , startRow = 2 , withFilter = TRUE ,
keepNA = TRUE , na.string = "NA"
)
openxlsx:: freezePane (wb = wb, sheet = currentSheet, firstActiveRow = 3 , firstActiveCol = 2 )
DT:: datatable (
data = prophages,
rownames = FALSE ,
filter = "top" ,
class = 'compact hover' ,
extensions = c ('KeyTable' , 'Scroller' , 'Select' , 'SearchBuilder' , 'FixedColumns' ),
options = list (
autoWidth = FALSE ,
dom = 'Qlfrtip' ,
scrollX = TRUE ,
fixedColumns = list (leftColumns = 2 ),
keys = TRUE ,
scroller = TRUE ,
scrollY = 600
),
selection = 'none'
)
table_counter <- table_counter + 1
```
## Table S`r table_counter`
Prophage statistics for individual genomes
```{r}
#| echo: false
#| column: screen-inset-right
phage_stats_genomes <- suppressMessages (
readr:: read_tsv (confs$ analysis$ prophages$ summary$ files$ prophage_stats_genome)
) %>%
dplyr:: select (- dplyr:: starts_with ("nodepath" )) %>%
dplyr:: left_join (
y = dplyr:: select (metadata, genomeId, strain, blackleg_phenotype, blackleg_PCR),
by = "genomeId"
) %>%
dplyr:: relocate (strain, blackleg_phenotype, blackleg_PCR, .after = SpeciesName)
currentSheet <- paste ("Table_S" , table_counter, sep = "" )
openxlsx:: addWorksheet (wb, sheetName = currentSheet)
openxlsx:: writeData (
wb = wb, sheet = currentSheet,
x = paste ("## Table S" , table_counter, ": Prophage statistics for individual genomes" , sep = "" )
)
openxlsx:: writeDataTable (
wb = wb, x = phage_stats_genomes, sheet = currentSheet,
startCol = 1 , startRow = 2 , withFilter = TRUE ,
keepNA = TRUE , na.string = "NA"
)
openxlsx:: freezePane (wb = wb, sheet = currentSheet, firstActiveRow = 3 , firstActiveCol = 2 )
DT:: datatable (
data = phage_stats_genomes,
rownames = FALSE ,
filter = "top" ,
class = 'compact hover' ,
extensions = c ('KeyTable' , 'Scroller' , 'Select' , 'SearchBuilder' , 'FixedColumns' ),
options = list (
autoWidth = FALSE ,
dom = 'Qlfrtip' ,
scrollX = TRUE ,
fixedColumns = list (leftColumns = 2 ),
keys = TRUE ,
scroller = TRUE ,
scrollY = 600
),
selection = 'none'
)
table_counter <- table_counter + 1
```
## Table S`r table_counter`
Species wise and pangenome wide prophage statistics
```{r}
#| echo: false
#| column: screen-inset-right
phage_stats <- suppressMessages (
readr:: read_tsv (confs$ analysis$ prophages$ summary$ files$ prophage_stats_species)
) %>%
dplyr:: rename (
n_phages = n_vir_sp,
phages_per_genome = mean_vir_per_g,
HG_core = core,
HG_accessory = accessory,
HG_unique = unique,
HG_total = total,
HG_phage = phage_nHgs.total,
phage_HG_ratio = phageRatio.total,
phage_HG_core = phage_nHgs.core,
phage_HG_accessory = phage_nHgs.accessory,
phage_HG_unique = phage_nHgs.unique
) %>%
dplyr:: select (- dplyr:: starts_with ("phageRatio." ))
currentSheet <- paste ("Table_S" , table_counter, sep = "" )
openxlsx:: addWorksheet (wb, sheetName = currentSheet)
openxlsx:: writeData (
wb = wb, sheet = currentSheet,
x = paste ("## Tabls S" , table_counter, ": Species wise and pangenome wide prophage statistics" , sep = "" )
)
openxlsx:: writeDataTable (
wb = wb, x = phage_stats, sheet = currentSheet,
startCol = 1 , startRow = 2 , withFilter = TRUE ,
keepNA = TRUE , na.string = "NA"
)
openxlsx:: freezePane (wb = wb, sheet = currentSheet, firstActiveRow = 3 , firstActiveCol = 2 )
DT:: datatable (
data = phage_stats,
rownames = FALSE ,
filter = "top" ,
class = 'compact hover' ,
extensions = c ('KeyTable' , 'Scroller' , 'Select' , 'SearchBuilder' , 'FixedColumns' ),
options = list (
autoWidth = FALSE ,
dom = 'Qlfrtip' ,
scrollX = TRUE ,
fixedColumns = list (leftColumns = 2 ),
keys = TRUE ,
scroller = TRUE ,
scrollY = 600
),
selection = 'none'
)
table_counter <- table_counter + 1
```
## Table S`r table_counter`
Prophage clustering based on syntenic Jaccard index
```{r}
#| echo: false
#| column: screen-inset-right
phage_grps <- suppressMessages (
readr:: read_tsv (confs$ analysis$ prophages$ files$ clusters)
) %>%
dplyr:: select (- starts_with ("nodepath." )) %>%
dplyr:: left_join (
y = dplyr:: select (metadata, genomeId, strain, blackleg_phenotype, blackleg_PCR),
by = "genomeId"
) %>%
dplyr:: relocate (strain, blackleg_phenotype, blackleg_PCR, .after = SpeciesName)
currentSheet <- paste ("Table_S" , table_counter, sep = "" )
openxlsx:: addWorksheet (wb, sheetName = currentSheet)
openxlsx:: writeData (
wb = wb, sheet = currentSheet,
x = paste ("## Table S" , table_counter, ": Prophage clustering based on syntenic Jaccard index" , sep = "" )
)
openxlsx:: writeDataTable (
wb = wb, x = phage_grps, sheet = currentSheet,
startCol = 1 , startRow = 2 , withFilter = TRUE ,
keepNA = TRUE , na.string = "NA"
)
openxlsx:: freezePane (wb = wb, sheet = currentSheet, firstActiveRow = 3 , firstActiveCol = 2 )
DT:: datatable (
data = phage_grps,
rownames = FALSE ,
filter = "top" ,
class = 'compact hover' ,
extensions = c ('KeyTable' , 'Scroller' , 'Select' , 'SearchBuilder' , 'FixedColumns' ),
options = list (
autoWidth = FALSE ,
dom = 'Qlfrtip' ,
scrollX = TRUE ,
fixedColumns = list (leftColumns = 2 ),
keys = TRUE ,
scroller = TRUE ,
scrollY = 600
),
selection = 'none'
)
table_counter <- table_counter + 1
```
## Table S`r table_counter`
Prophage homology group functional categories
```{r}
#| echo: false
#| column: screen-inset-right
prophage_hg_ann <- suppressMessages (
readr:: read_tsv (confs$ analysis$ prophages$ files$ hg_broad_functions)
) %>%
dplyr:: rename (homology_group = hgId, pangenome_class = class)
currentSheet <- paste ("Table_S" , table_counter, sep = "" )
openxlsx:: addWorksheet (wb, sheetName = currentSheet)
openxlsx:: writeData (
wb = wb, sheet = currentSheet,
x = paste ("## Table S" , table_counter, ": Prophage homology group functional categories" , sep = "" )
)
openxlsx:: writeDataTable (
wb = wb, x = prophage_hg_ann, sheet = currentSheet,
startCol = 1 , startRow = 2 , withFilter = TRUE ,
keepNA = TRUE , na.string = "NA"
)
openxlsx:: freezePane (wb = wb, sheet = currentSheet, firstActiveRow = 3 , firstActiveCol = 2 )
DT:: datatable (
data = prophage_hg_ann,
rownames = FALSE ,
filter = "top" ,
class = 'compact hover' ,
extensions = c ('KeyTable' , 'Scroller' , 'Select' , 'SearchBuilder' , 'FixedColumns' ),
options = list (
autoWidth = FALSE ,
dom = 'Qlfrtip' ,
scrollX = TRUE ,
fixedColumns = list (leftColumns = 3 ),
keys = TRUE ,
scroller = TRUE ,
scrollY = 600
),
selection = 'none'
)
table_counter <- table_counter + 1
```
## Table S`r table_counter`
Anti-phage defense systems from DefenseFinder
```{r}
#| echo: false
#| column: screen-inset-right
defense_systems <- suppressMessages (
readr:: read_tsv (confs$ analysis$ defense_systems$ files$ data)
)
currentSheet <- paste ("Table_S" , table_counter, sep = "" )
openxlsx:: addWorksheet (wb, sheetName = currentSheet)
openxlsx:: writeData (
wb = wb, sheet = currentSheet,
x = paste ("## Table S" , table_counter, ": Anti-phage defense systems identified by DefenseFinder" , sep = "" )
)
openxlsx:: writeDataTable (
wb = wb, x = defense_systems, sheet = currentSheet,
startCol = 1 , startRow = 2 , withFilter = TRUE ,
keepNA = TRUE , na.string = "NA"
)
openxlsx:: freezePane (wb = wb, sheet = currentSheet, firstActiveRow = 3 , firstActiveCol = 2 )
DT:: datatable (
data = defense_systems,
rownames = FALSE ,
filter = "top" ,
class = 'compact hover' ,
extensions = c ('KeyTable' , 'Scroller' , 'Select' , 'SearchBuilder' , 'FixedColumns' ),
options = list (
autoWidth = FALSE ,
dom = 'Qlfrtip' ,
scrollX = TRUE ,
fixedColumns = list (leftColumns = 3 ),
keys = TRUE ,
scroller = TRUE ,
scrollY = 600
),
selection = 'none'
)
table_counter <- table_counter + 1
```
## Table S`r table_counter`
Inhouse sample information
```{r}
#| echo: false
#| column: screen-inset-right
inhouse_samples <- dplyr:: filter (metadata, source != "NCBI" ) %>%
dplyr:: select (genomeId, source, sampleId, AssemblyAccession, BioSampleAccn, BioprojectAccn)
currentSheet <- paste ("Table_S" , table_counter, sep = "" )
openxlsx:: addWorksheet (wb, sheetName = currentSheet)
openxlsx:: writeData (
wb = wb, sheet = currentSheet,
x = paste ("## Table S" , table_counter, ": Inhouse genome assemblies" , sep = "" )
)
openxlsx:: writeDataTable (
wb = wb, x = inhouse_samples, sheet = currentSheet,
startCol = 1 , startRow = 2 , withFilter = TRUE ,
keepNA = TRUE , na.string = "NA"
)
openxlsx:: freezePane (wb = wb, sheet = currentSheet, firstActiveRow = 3 , firstActiveCol = 2 )
DT:: datatable (
data = inhouse_samples,
rownames = FALSE ,
filter = "top" ,
class = 'compact hover' ,
extensions = c ('KeyTable' , 'Scroller' , 'Select' , 'SearchBuilder' , 'FixedColumns' ),
options = list (
autoWidth = FALSE ,
dom = 'Qlfrtip' ,
scrollX = TRUE ,
fixedColumns = list (leftColumns = 2 ),
keys = TRUE ,
scroller = TRUE ,
scrollY = 600
),
selection = 'none'
)
table_counter <- table_counter + 1
```
:::
```{r}
#| echo: false
# save all table to Excel file
openxlsx:: saveWorkbook (
wb = wb, overwrite = TRUE ,
file = file.path ("reports/manuscript_prophages" , "ms_prophages_supplementary_data.xlsx" )
)
```