Supplementary tables for carotovoricin analysis

Author

Lakhansing Pardeshi

Published

July 20, 2025

Metadata for all the genome in the current pangenome along with carotovoricin presence/absence information

geNomad prophage prediction result summary for 454 genomes

Prophage clustering based on syntenic Jaccard index

Carotovoricin cluster presence/absence data

Carotovoricin cluster homology groups and their functional categories

Homology group based haplotypes for the tail fiber locus

--- title: "Supplementary tables for carotovoricin analysis" author: "Lakhansing Pardeshi" date: "`r Sys.Date()`" format: html: embed-resources: true df-print: paged knitr: opts_chunk: fig.height: 7 --- ```{r} #| label: setup #| echo: false #| warning: false suppressPackageStartupMessages(library(tidyverse)) suppressPackageStartupMessages(library(org.Pectobacterium.spp.pan.eg.db)) suppressPackageStartupMessages(library(DT)) suppressPackageStartupMessages(library(openxlsx)) rm(list = ls()) source("https://raw.githubusercontent.com/lakhanp1/omics_utils/main/RScripts/utils.R") source("scripts/utils/config_functions.R") source("scripts/utils/homology_groups.R") ################################################################################ set.seed(124) confs <- prefix_config_paths( conf = suppressWarnings(configr::read.config(file = "project_config.yaml")), dir = "." ) pangenome <- confs$data$pangenomes$pectobacterium.v2$name panConf <- confs$data$pangenomes[[pangenome]] panOrgDb <- org.Pectobacterium.spp.pan.eg.db ``` ```{r} #| echo: false #| warning: false ## write tables to excel file wb <- openxlsx::createWorkbook() add_table_to_workbook <- function(data, wb, table_num, description = "## Table"){ currentSheet <- paste("Table_S", table_num, sep = "") openxlsx::addWorksheet(wb, sheetName = currentSheet) openxlsx::writeData( wb = wb, sheet = currentSheet, x = description ) openxlsx::writeDataTable( wb = wb, x = data, sheet = currentSheet, startCol = 1, startRow = 2, withFilter = TRUE, keepNA = TRUE, na.string = "NA" ) openxlsx::freezePane(wb = wb, sheet = currentSheet, firstActiveRow = 3, firstActiveCol = 2) return(wb) } table_counter <- 1 ``` ::: {.panel-tabset} ## Table S`r table_counter` Metadata for all the genome in the current pangenome along with carotovoricin presence/absence information ```{r} #| echo: false #| column: screen-inset-right metadata <- suppressMessages(readr::read_csv(panConf$files$metadata)) %>% dplyr::select(-starts_with("nodepath."), -Genome) %>% dplyr::select( genomeId, SpeciesName, sampleId, strain, AssemblyAccession, geo_loc_country, collection_year, host, env_broad_scale, type_material, AssemblyName, BioprojectAccn, BioSampleAccn, length, N50, L50, n_contigs ) wb <- add_table_to_workbook( data = metadata, wb = wb, table_num = table_counter, description = paste( "## Table S", table_counter, ": Metadata for genome assemblies used in the current pangenome", sep = "" ) ) DT::datatable( data = metadata, rownames = FALSE, filter = "top", class = 'compact hover', extensions = c('KeyTable', 'Scroller', 'Select', 'SearchBuilder', 'FixedColumns'), options = list( autoWidth = FALSE, dom = 'Qlfrtip', scrollX = TRUE, fixedColumns = list(leftColumns = 2), keys = TRUE, scroller = TRUE, scrollY = 600 ), selection = 'none' ) table_counter <- table_counter + 1 ``` ## Table S`r table_counter` geNomad prophage prediction result summary for 454 genomes ```{r} #| echo: false #| column: screen-inset-right prophages <- suppressMessages(readr::read_tsv(confs$data$prophages$files$data)) %>% dplyr::mutate( integrated = dplyr::if_else( condition = is.na(start) & is.na(end), true = "N", false = "Y" ) ) %>% dplyr::select(prophage_id, genomeId, everything()) %>% dplyr::relocate(integrated, .after = end) %>% dplyr::left_join( y = dplyr::select(metadata, genomeId, strain), by = "genomeId" ) %>% dplyr::relocate(strain, .after = SpeciesName) wb <- add_table_to_workbook( data = prophages, wb = wb, table_num = table_counter, description = paste( "## Table S", table_counter, ": geNomad prophage prediction result summary for 454 genomes", sep = "" ) ) DT::datatable( data = prophages, rownames = FALSE, filter = "top", class = 'compact hover', extensions = c('KeyTable', 'Scroller', 'Select', 'SearchBuilder', 'FixedColumns'), options = list( autoWidth = FALSE, dom = 'Qlfrtip', scrollX = TRUE, fixedColumns = list(leftColumns = 2), keys = TRUE, scroller = TRUE, scrollY = 600 ), selection = 'none' ) table_counter <- table_counter + 1 ``` ## Table S`r table_counter` Prophage clustering based on syntenic Jaccard index ```{r} #| echo: false #| column: screen-inset-right phage_grps <- suppressMessages( readr::read_tsv(confs$analysis$prophages$files$clusters) ) %>% dplyr::select(-starts_with("nodepath.")) %>% dplyr::left_join( y = dplyr::select(metadata, genomeId, strain), by = "genomeId" ) %>% dplyr::relocate(strain, .after = SpeciesName) wb <- add_table_to_workbook( data = phage_grps, wb = wb, table_num = table_counter, description = paste( "## Table S", table_counter, ": Prophage clustering based on syntenic Jaccard index", sep = "" ) ) DT::datatable( data = phage_grps, rownames = FALSE, filter = "top", class = 'compact hover', extensions = c('KeyTable', 'Scroller', 'Select', 'SearchBuilder', 'FixedColumns'), options = list( autoWidth = FALSE, dom = 'Qlfrtip', scrollX = TRUE, fixedColumns = list(leftColumns = 2), keys = TRUE, scroller = TRUE, scrollY = 600 ), selection = 'none' ) table_counter <- table_counter + 1 ``` ## Table S`r table_counter` Carotovoricin cluster presence/absence data ```{r} #| echo: false #| column: screen-inset-right ctvPav <- suppressMessages( readr::read_tsv(file = confs$analysis$ctv$data$files$ctv_pav) ) |> dplyr::rename( "n_homology_groups" = nHgs, "homology_group_signature" = hgs, "coordinates" = pos ) wb <- add_table_to_workbook( data = ctvPav, wb = wb, table_num = table_counter, description = paste( "## Table S", table_counter, ": Carotovoricin presence.absence data for 454 genomes.", "In case of carotovoricin cluster split onto multiple contigs, \"|\" seperator is used for coordinates and homology_group_signature columns", sep = "" ) ) DT::datatable( data = ctvPav, rownames = FALSE, filter = "top", class = 'compact hover', extensions = c('KeyTable', 'Scroller', 'Select', 'SearchBuilder', 'FixedColumns'), options = list( autoWidth = FALSE, dom = 'Qlfrtip', scrollX = TRUE, fixedColumns = list(leftColumns = 2), keys = TRUE, scroller = TRUE, scrollY = 600 ), selection = 'none' ) table_counter <- table_counter + 1 ``` ## Table S`r table_counter` Carotovoricin cluster homology groups and their functional categories ```{r} #| echo: false #| column: screen-inset-right ctvHgs <- suppressMessages( readr::read_tsv(file = confs$analysis$ctv$data$files$hg_broad_functions) ) |> dplyr::rename(homology_group = hgId) wb <- add_table_to_workbook( data = ctvHgs, wb = wb, table_num = table_counter, description = paste( "## Table S", table_counter, ": Carotovoricin cluster homology groups and their functional categories", sep = "" ) ) DT::datatable( data = ctvHgs, rownames = FALSE, filter = "top", class = 'compact hover', extensions = c('KeyTable', 'Scroller', 'Select', 'SearchBuilder', 'FixedColumns'), options = list( autoWidth = FALSE, dom = 'Qlfrtip', scrollX = TRUE, fixedColumns = list(leftColumns = 3), keys = TRUE, scroller = TRUE, scrollY = 600 ), selection = 'none' ) table_counter <- table_counter + 1 ``` ## Table S`r table_counter` Homology group based haplotypes for the tail fiber locus ```{r} #| echo: false #| column: screen-inset-right tfl_haplotypes <- suppressMessages( readr::read_tsv(file = confs$analysis$ctv$tfl$files$hg_regions) ) |> dplyr::add_count(haplotype, name = "n") |> dplyr::rename( "n_homology_groups" = nHgs, "homology_group_signature" = hgs ) wb <- add_table_to_workbook( data = tfl_haplotypes, wb = wb, table_num = table_counter, description = paste( "## Table S", table_counter, ": Carotovoricin tail fiber locus haplotypes", sep = "" ) ) DT::datatable( data = tfl_haplotypes, rownames = FALSE, filter = "top", class = 'compact hover', extensions = c('KeyTable', 'Scroller', 'Select', 'SearchBuilder', 'FixedColumns'), options = list( autoWidth = FALSE, dom = 'Qlfrtip', scrollX = TRUE, fixedColumns = list(leftColumns = 3), keys = TRUE, scroller = TRUE, scrollY = 600 ), selection = 'none' ) table_counter <- table_counter + 1 ``` ::: ```{r} #| echo: false # save all table to Excel file openxlsx::saveWorkbook( wb = wb, overwrite = TRUE, file = file.path("reports/manuscript_ctv", "ms_ctv_supplementary_data.xlsx") ) ```