updated: load functions, merge, subset and RunDPA/RunDCA to accept ne…

…w spatial metrics
PixelgenTechnologies · Sep 30, 2024 · 93c39ca · 93c39ca
1 parent 5ac59d1
commit 93c39ca
Show file tree

Hide file tree

Showing 8 changed files with 70 additions and 94 deletions.
diff --git a/R/CellGraphAssay.R b/R/CellGraphAssay.R
@@ -1185,13 +1185,7 @@ merge.MPXAssay <- function(
   }
 
   objects <- c(x, y)
-  cell.names <- unlist(lapply(objects, colnames))
-  name_conversion <- do.call(bind_rows, lapply(seq_along(objects), function(i) {
-    tibble(component = colnames(objects[[i]]), sample = i)
-  })) %>%
-    mutate(component_new = cell.names) %>%
-    group_by(sample) %>%
-    group_split()
+  cell_names <- unlist(lapply(objects, colnames))
 
   # Define add.cell.ids
   if (!is.null(add.cell.ids)) {
@@ -1207,7 +1201,7 @@ merge.MPXAssay <- function(
   }
 
   # Check duplicate cell names
-  unique_names <- table(cell.names)
+  unique_names <- table(cell_names)
   names_are_duplicated <- any(unique_names > 1)
   if (names_are_duplicated && is.null(add.cell.ids)) {
     abort(glue(
@@ -1230,44 +1224,27 @@ merge.MPXAssay <- function(
     merge.data = merge.data,
     ...
   )
+
+  # Join layers
   if (collapse && is(new_assay, "CellGraphAssay5")) {
     new_assay <- JoinLayers(new_assay)
   }
-
-  # Join layers
   if (collapse && is(new_assay, "Assay5")) {
     new_assay <- JoinLayers(new_assay)
   }
 
-  # Fetch cellgraphs
+  # Merge cellgraphs list
   cellgraphs_new <- Reduce(c, lapply(objects, function(cg_assay) {
     return(cg_assay@cellgraphs)
   })) %>% set_names(nm = colnames(new_assay))
 
-
-  # Merge polarization and colocalization scores
-  polarization <- do.call(bind_rows, lapply(seq_along(objects), function(i) {
-    pl <- slot(objects[[i]], name = "polarization")
-    if (length(pl) == 0) {
-      return(pl)
-    }
-    pl <- pl %>%
-      left_join(name_conversion[[i]], by = "component") %>%
-      select(-component, -sample) %>%
-      rename(component = component_new)
-    return(pl)
-  }))
-  colocalization <- do.call(bind_rows, lapply(seq_along(objects), function(i) {
-    cl <- slot(objects[[i]], name = "colocalization")
-    if (length(cl) == 0) {
-      return(cl)
-    }
-    cl <- cl %>%
-      left_join(name_conversion[[i]], by = "component") %>%
-      select(-component, -sample) %>%
-      rename(component = component_new)
-    return(cl)
-  }))
+  # Merge spatial metrics tables
+  polarization <- lapply(objects, function(object) {
+    slot(object, name = "polarization")
+  }) %>% bind_rows()
+  colocalization <- lapply(objects, function(object) {
+    slot(object, name = "colocalization")
+  }) %>% bind_rows()
 
   # Merge fs_map
   fs_map <- tibble()

diff --git a/R/differential_colocalization_analysis.R b/R/differential_colocalization_analysis.R
@@ -19,7 +19,7 @@ RunDCA.data.frame <- function(
   reference,
   targets = NULL,
   group_vars = NULL,
-  coloc_metric = c("pearson_z", "pearson"),
+  coloc_metric = "pearson_z",
   min_n_obs = 0,
   alternative = c("two.sided", "less", "greater"),
   conf_int = TRUE,
@@ -28,7 +28,6 @@ RunDCA.data.frame <- function(
   verbose = TRUE,
   ...
 ) {
-  coloc_metric <- match.arg(coloc_metric, choices = c("pearson_z", "pearson"))
 
   # Validate input parameters
   .validate_dpa_dca_input(
@@ -37,6 +36,7 @@ RunDCA.data.frame <- function(
     data_type = "colocalization"
   )
 
+  # Define targets as all groups except the reference if not specified
   targets <- targets %||% setdiff(unique(object[, contrast_column, drop = TRUE]), reference)
 
   # Check multiple choice args
@@ -286,7 +286,7 @@ RunDCA.Seurat <- function(
   targets = NULL,
   assay = NULL,
   group_vars = NULL,
-  coloc_metric = c("pearson_z", "pearson"),
+  coloc_metric = "pearson_z",
   min_n_obs = 0,
   alternative = c("two.sided", "less", "greater"),
   conf_int = TRUE,
@@ -300,7 +300,6 @@ RunDCA.Seurat <- function(
     "'contrast_column' must be available in Seurat object meta.data" =
       contrast_column %in% colnames(object[[]])
   )
-  coloc_metric <- match.arg(coloc_metric, choices = c("pearson_z", "pearson"))
 
   # Use default assay if assay = NULL
   if (!is.null(assay)) {
@@ -331,16 +330,6 @@ RunDCA.Seurat <- function(
   colocalization_data <- colocalization_data %>%
     left_join(y = group_data, by = "component")
 
-  # Remove redundant columns
-  colocalization_data <- colocalization_data %>%
-    select(-any_of(c(
-      "pearson_mean", "pearson_stdev",
-      "pearson_p_value", "pearson_p_value_adjusted",
-      "jaccard", "jaccard_mean", "jaccard_stdev",
-      "jaccard_z", "jaccard_p_value", "jaccard_p_value_adjusted",
-      setdiff(c("pearson_z", "pearson"), coloc_metric)
-    )))
-
   # Run DCA
   coloc_test_bind <- RunDCA(colocalization_data,
     targets = targets,

diff --git a/R/differential_polarity_analysis.R b/R/differential_polarity_analysis.R
@@ -41,7 +41,7 @@ RunDPA.data.frame <- function(
   reference,
   targets = NULL,
   group_vars = NULL,
-  polarity_metric = c("morans_z", "morans_i"),
+  polarity_metric = "morans_z",
   min_n_obs = 0,
   cl = NULL,
   alternative = c("two.sided", "less", "greater"),
@@ -50,7 +50,6 @@ RunDPA.data.frame <- function(
   verbose = TRUE,
   ...
 ) {
-  polarity_metric <- match.arg(polarity_metric, choices = c("morans_z", "morans_i"))
 
   # Validate input parameters
   .validate_dpa_dca_input(
@@ -59,6 +58,11 @@ RunDPA.data.frame <- function(
     data_type = "polarity"
   )
 
+  # Remove redundant columns
+  object <- object %>%
+    select(all_of(c("marker", "component", contrast_column, group_vars, polarity_metric)))
+
+  # Define targets as all groups except the reference if not specified
   targets <- targets %||% setdiff(unique(object[, contrast_column, drop = TRUE]), reference)
 
   # Check multiple choice args
@@ -280,7 +284,7 @@ RunDPA.Seurat <- function(
   targets = NULL,
   assay = NULL,
   group_vars = NULL,
-  polarity_metric = c("morans_z", "morans_i"),
+  polarity_metric = "morans_z",
   min_n_obs = 0,
   cl = NULL,
   alternative = c("two.sided", "less", "greater"),
@@ -294,7 +298,6 @@ RunDPA.Seurat <- function(
     "'contrast_column' must be available in Seurat object meta.data" =
       contrast_column %in% colnames(object[[]])
   )
-  polarity_metric <- match.arg(polarity_metric, choices = c("morans_z", "morans_i"))
 
   # Use default assay if assay = NULL
   if (!is.null(assay)) {
@@ -331,13 +334,6 @@ RunDPA.Seurat <- function(
   polarization_data <- polarization_data %>%
     left_join(y = group_data, by = "component")
 
-  # Remove redundant columns
-  polarization_data <- polarization_data %>%
-    select(-any_of(c(
-      "morans_p_value", "morans_p_adjusted",
-      setdiff(c("morans_z", "morans_i"), polarity_metric)
-    )))
-
   # Run DPA
   pol_test_bind <- RunDPA(polarization_data,
     targets = targets,

diff --git a/R/generics.R b/R/generics.R
@@ -347,8 +347,8 @@ edgelist_to_simple_Anode_graph <- function(
 #' \code{reference} will be compared to the \code{reference} group.
 #' @param reference The name of the reference group
 #' @param group_vars An optional character vector with column names to group the tests by.
-#' @param polarity_metric The polarity metric to use. Currently, you can select one of "morans_z" (default)
-#' or "morans_i".
+#' @param polarity_metric The polarity metric to use. Any numeric data column in the polarity score table
+#' can be selected. The default is "morans_z".
 #' @param min_n_obs Minimum number of observations allowed in a group. Target groups with less
 #' observations than \code{min_n_obs} will be skipped.
 #' @param alternative One of 'two.sided', 'less' or 'greater' (see \code{?wilcox.test} for details)
@@ -446,8 +446,8 @@ RunDPA <- function(
 #' \code{reference} will be compared to the \code{reference} group.
 #' @param reference The name of the reference group
 #' @param group_vars An optional character vector with column names to group the tests by.
-#' @param coloc_metric The colocalization metric to use. Currently, you can select one of "pearson_z" (default)
-#' or "pearson".
+#' @param coloc_metric The polarity metric to use. Any numeric data column in the colocalization score table
+#' can be selected. The default is "pearson_z".
 #' @param min_n_obs Minimum number of observations allowed in a group. Target groups with less
 #' observations than \code{min_n_obs} will be skipped.
 #' @param alternative One of 'two.sided', 'less' or 'greater' (see \code{?wilcox.test} for details)

diff --git a/R/load_data.R b/R/load_data.R
@@ -61,7 +61,11 @@ ReadMPX_counts <- function(
   # Extract contents
   X <- hd5_object[["X"]]$read()
   colnames(X) <- hd5_object[["obs"]][["component"]]$read()
-  rownames(X) <- hd5_object[["var"]][["marker"]]$read()
+  markers <- try({hd5_object[["var"]][["marker"]]$read()}, silent = TRUE)
+  if (inherits(markers, "try-error")) {
+    markers <- hd5_object[["var"]][["_index"]]$read()
+  }
+  rownames(X) <- markers
 
   X <- X[seq_len(nrow(X)), seq_len(ncol(X))]
 
@@ -181,11 +185,22 @@ ReadMPX_Seurat <- function(
     # Load polarity scores
     if (load_polarity_scores) {
       polarization <- ReadMPX_item(filename = filename, items = "polarization", verbose = FALSE)
+      if (inherits(polarization$component, "integer")) {
+        polarization$component <- as.character(polarization$component)
+      }
       cg_assay@polarization <- polarization
     }
     # Load colocalization scores
     if (load_colocalization_scores) {
       colocalization <- ReadMPX_item(filename = filename, items = "colocalization", verbose = FALSE)
+      if (inherits(colocalization$component, "integer")) {
+        colocalization$component <- as.character(colocalization$component)
+      }
+      # TODO: Remove this once the colocalization tables have been updated
+      if (all(c("marker1", "marker2") %in% names(colocalization))) {
+        colocalization <- colocalization %>%
+          rename(marker_1 = marker1, marker_2 = marker2)
+      }
       cg_assay@colocalization <- colocalization
     }
   } else {
@@ -455,6 +470,9 @@ ReadMPX_metadata <- function(
   meta_data <- jsonlite::read_json(temp_file, simplifyVector = TRUE) %>%
     as_tibble() %>%
     select(-contains("file_format_version"))
+  if (nrow(meta_data) == 0) {
+    abort("No metadata found in the PXL file")
+  }
   analysis <- meta_data$analysis[[1]]
   if (any(c("polarization", "colocalization") %in% names(analysis))) {
     analysis <- unlist(analysis %>% unname())

diff --git a/R/utils.R b/R/utils.R
@@ -93,14 +93,10 @@
   # Validate polarization and colocalization
   if (length(polarization) > 0) {
     # Check column names
-    stopifnot(
-      "'polarization' names are invalid" =
-        all(sort(names(polarization)) ==
-          sort(c(
-            "morans_i", "morans_p_value", "morans_p_adjusted",
-            "morans_z", "marker", "component"
-          )))
-    )
+    name_check <- all(c("marker", "component") %in% names(polarization))
+    if (!name_check) {
+      abort("Columns 'marker', and 'component' are required in the 'polarization' score table")
+    }
     # Check component names
     cells_in_polarization <- cell_ids %in% (polarization$component %>% unique())
     if (!all(cells_in_polarization)) {
@@ -148,17 +144,10 @@
 
   if (length(colocalization) > 0) {
     # Check column names
-    stopifnot(
-      "'colocalization' names are invalid" =
-        all(names(colocalization) ==
-          c(
-            "marker_1", "marker_2", "pearson", "pearson_mean",
-            "pearson_stdev", "pearson_z", "pearson_p_value",
-            "pearson_p_value_adjusted", "jaccard", "jaccard_mean",
-            "jaccard_stdev", "jaccard_z", "jaccard_p_value",
-            "jaccard_p_value_adjusted", "component"
-          ))
-    )
+    name_check <- all(c("marker_1", "marker_2", "component") %in% names(colocalization))
+    if (!name_check) {
+      abort("Columns 'marker_1', 'marker_2', and 'component' are required in the 'colocalization' score table")
+    }
     # Check component names
     cells_in_colocalization <- cell_ids %in% (colocalization$component %>% unique())
     if (!all(cells_in_colocalization)) {
@@ -425,13 +414,20 @@ abort_if_not <- function(
 
   # Validate spatial metric
   abort_if_not(
-    "{metric_name} = '{spatial_metric}' is invalid
-    {metric_name} must be present in the {data_type} score table" =
+    "{metric_name} = '{spatial_metric}' is missing from the {data_type} score table." =
       spatial_metric %in% colnames(object),
     "conf_int = '{conf_int}'
     must be TRUE or FALSE" =
       inherits(conf_int, what = "logical") & (length(conf_int) == 1)
   )
+  if (!inherits(object[, spatial_metric, drop = TRUE], what = "numeric")) {
+    abort(
+      glue(
+        "Column '{spatial_metric}' (polarity_metric) is a '{class(object[, spatial_metric, drop = TRUE])}' ",
+        "vector but must be a 'numeric' vector.\n"
+      )
+    )
+  }
 
   # Validate group_vars
   if (!is.null(group_vars)) {

diff --git a/man/RunDCA.Rd b/man/RunDCA.Rd
diff --git a/man/RunDPA.Rd b/man/RunDPA.Rd