From c1150db64e8663e78ba0e904bba4be92e74414f9 Mon Sep 17 00:00:00 2001 From: Witold Wolski Date: Sat, 6 Apr 2024 11:57:05 +0200 Subject: [PATCH] improve support for peptide counts --- NAMESPACE | 3 +- R/ProteinAnnotation.R | 17 +++---- R/tidyMS_R6_TransitionCorrelations.R | 66 ++++++++++++++++++++++++---- R/tidyMS_aggregation.R | 22 ++-------- man/ProteinAnnotation.Rd | 15 +++---- man/get_imputed_contrasts.Rd | 4 -- man/nr_B_in_A_per_sample.Rd | 13 ++---- man/nr_obs.Rd | 20 --------- man/nr_obs_hierarchy.Rd | 28 ++++++++++++ man/nr_obs_sample.Rd | 24 ++++++++++ 10 files changed, 131 insertions(+), 81 deletions(-) delete mode 100644 man/nr_obs.Rd create mode 100644 man/nr_obs_hierarchy.Rd create mode 100644 man/nr_obs_sample.Rd diff --git a/NAMESPACE b/NAMESPACE index d058e9bf3..c317e6891 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -102,7 +102,8 @@ export(names_to_matrix) export(normalize_log2_robscale) export(nr_B_in_A) export(nr_B_in_A_per_sample) -export(nr_obs) +export(nr_obs_hierarchy) +export(nr_obs_sample) export(old2new) export(pairs_smooth) export(pairs_w_abline) diff --git a/R/ProteinAnnotation.R b/R/ProteinAnnotation.R index 2058d94ee..237edac7a 100644 --- a/R/ProteinAnnotation.R +++ b/R/ProteinAnnotation.R @@ -5,22 +5,17 @@ #' @family LFQData #' @examples #' -#' istar <- prolfqua_data('data_ionstar')$filtered() -#' istar$config <- old2new(istar$config) -#' data <- istar$data #|> dplyr::filter(protein_Id %in% sample(protein_Id, 100)) +#' istar <-prolfqua::sim_lfq_data_peptide_config() +#' data <- istar$data +#' +#' #' lfqdata <- LFQData$new(data, istar$config) #' pannot <- ProteinAnnotation$new( lfqdata ) -#' #' pannot$annotate_decoys() #' pannot$annotate_contaminants() -#' -#' stopifnot(pannot$nr_clean(contaminants = FALSE) == 163) -#' stopifnot(pannot$nr_clean(contaminants = TRUE) == 158) -#' stopifnot(nrow(pannot$clean(contaminants = FALSE)) == 163) -#' stopifnot(nrow(pannot$clean(contaminants = TRUE)) == 158) #' dd <- pannot$clean() #' tmp <- lfqdata$get_subset(dd) -#' stopifnot(tmp$hierarchy_counts()$protein_Id == 158) +#' #' ProteinAnnotation <- R6::R6Class("ProteinAnnotation", @@ -54,6 +49,8 @@ ProteinAnnotation <- } else { self$row_annot <- distinct(select(lfqdata$data, self$pID)) } + + }, #' @description #' annotate rev sequences diff --git a/R/tidyMS_R6_TransitionCorrelations.R b/R/tidyMS_R6_TransitionCorrelations.R index 5efc36318..ceb3964dd 100644 --- a/R/tidyMS_R6_TransitionCorrelations.R +++ b/R/tidyMS_R6_TransitionCorrelations.R @@ -645,18 +645,14 @@ nr_B_in_A <- function(pdata, config , merge = TRUE){ #' @keywords internal #' @family summary #' @examples -#' bb <- prolfqua::prolfqua_data('data_ionstar')$filtered() -#' bb$config <- old2new(bb$config) -#' stopifnot(nrow(bb$data) == 25780) -#' configur <- bb$config$clone(deep=TRUE) -#' data <- bb$data -#' -#' nr_B_in_A_per_sample(data, configur, nested =FALSE) -#' bb <- prolfqua::prolfqua_data('data_IonstarProtein_subsetNorm') -#' bb$config <- old2new(config = bb$config$clone( deep = TRUE)) +#' debug(nr_B_in_A_per_sample) +#' bb <- prolfqua::sim_lfq_data_peptide_config() +#' nr_B_in_A_per_sample(bb$data, bb$config, nested =FALSE) +#' bb <- prolfqua::sim_lfq_data_protein_config() #' nr_B_in_A_per_sample(bb$data, bb$config, nested=FALSE) #' nr_B_in_A_per_sample <- function(data, config, nested = TRUE){ + #TODO wew check for deprecation since not used. cf <- config levelA <- cf$table$hierarchy_keys_depth() @@ -687,6 +683,58 @@ nr_B_in_A_per_sample <- function(data, config, nested = TRUE){ +#' Aggregates e.g. protein abundances from peptide abundances +#' +#' @export +#' @examples +#' dd <- prolfqua::sim_lfq_data_peptide_config() +#' dd$data <- na.omit(dd$data) +#' xd <- nr_obs_sample(dd$data, dd$config) +#' xd +#' xd$nr_children |> table() +#' +#' +#' dp <- prolfqua::sim_lfq_data_protein_config() +#' xp <- nr_obs_sample(dp$data, dp$config) +#' xp$nr_peptides |> table() +#' +nr_obs_sample <- function(data, config, new_child = config$table$nr_children){ + data <- na.omit(data) + nr_children <- data |> + group_by(!!!rlang::syms(c(config$table$hierarchy_keys_depth(), config$table$fileName))) |> + summarize(!!new_child := sum(!!sym(config$table$nr_children), na.rm = TRUE), .groups = "drop") + return(nr_children) +} + +#' Aggregates e.g. protein abundances from peptide abundances +#' +#' @export +#' @examples +#' dd <- prolfqua::sim_lfq_data_peptide_config() +#' +#' xd <- nr_obs_hierarchy(dd$data, dd$config) +#' xd +#' dp <- prolfqua::sim_lfq_data_protein_config() +#' debug(nr_obs_hierarchy) +#' nr_obs_sample(dp$data, dp$config) +#' xd <- nr_obs_hierarchy(dp$data, dp$config) +#' +#' +nr_obs_hierarchy <- function(data, config, from_children = TRUE , name_nr_child = "nr_child_exp"){ + tb <- config$table + if (!from_children & (tb$hierarchyDepth < length(tb$hierarchy_keys())) ) { + xq <- data |> tidyr::select(tb$hierarchy_keys()) |> + distinct() |> + dplyr::group_by(!!sym(tb$hierarchy_keys_depth())) |> + dplyr::summarize(!!name_nr_child := dplyr::n(), .groups = "drop") + } else { + xz <- nr_obs_sample(data,config) + xz <- x |> group_by(!!sym(tb$hierarchy_keys_depth())) |> + summarize(!!name_nr_child := max(!!sym(tb$nr_children)), .groups = "drop") + } +} + + # Summarize Intensities by Intensity or NAs ---- .rankProteinPrecursors <- function(data, config, diff --git a/R/tidyMS_aggregation.R b/R/tidyMS_aggregation.R index 9b45818bd..70c35d2ac 100644 --- a/R/tidyMS_aggregation.R +++ b/R/tidyMS_aggregation.R @@ -620,28 +620,14 @@ estimate_intensity <- function(data, config, .func) dplyr::ungroup() new_child = paste0("nr_",config$table$hierarchy_keys_depth()) - res_nr_children <- nr_obs(data, config, new_child = new_child) + res_nr_children <- nr_obs_sample(data, config, new_child = new_child) unnested <- inner_join(unnested, res_nr_children, by = c(config$table$hierarchy_keys_depth(), config$table$fileName)) newconfig$table$nr_children = new_child return(list(data = unnested, config = newconfig)) } -#' Aggregates e.g. protein abundances from peptide abundances -#' -#' @export -#' @examples -#' dd <- prolfqua::sim_lfq_data_peptide_config() -#' dd$data <- na.omit(dd$data) -#' xd <- nr_obs(dd$data, dd$config) -#' -#' #xd |> head() -#' -#' xd$nr_children |> table() -nr_obs <- function(data, config, new_child = config$table$nr_children){ - nr_children <- data |> group_by(!!!rlang::syms(c(config$table$hierarchy_keys_depth(), config$table$fileName))) |> - summarize(!!new_child := sum(!!sym(config$table$nr_children), na.rm = TRUE)) - return(nr_children) -} + + #' Plot feature data and result of aggregation #' @@ -767,7 +753,7 @@ aggregate_intensity_topN <- function(pdata , config, .func, N = 3){ hierarchy = config$table$hierarchy[seq_len(config$table$hierarchyDepth)]) new_child_name <- paste0("nr_", config$table$hierarchy_keys_depth() ) - res_nr_children <- nr_obs(pdata, config, new_child = new_child_name) + res_nr_children <- nr_obs_sample(pdata, config, new_child = new_child_name) sumTopInt <- inner_join( sumTopInt, res_nr_children, by = c(config$table$fileName, config$table$hierarchy_keys_depth())) diff --git a/man/ProteinAnnotation.Rd b/man/ProteinAnnotation.Rd index 7f8fef2db..d0230aa22 100644 --- a/man/ProteinAnnotation.Rd +++ b/man/ProteinAnnotation.Rd @@ -10,22 +10,17 @@ Decorates LFQData with a row annotation and some protein specific functions. } \examples{ -istar <- prolfqua_data('data_ionstar')$filtered() -istar$config <- old2new(istar$config) -data <- istar$data #|> dplyr::filter(protein_Id \%in\% sample(protein_Id, 100)) +istar <-prolfqua::sim_lfq_data_peptide_config() +data <- istar$data + + lfqdata <- LFQData$new(data, istar$config) pannot <- ProteinAnnotation$new( lfqdata ) - pannot$annotate_decoys() pannot$annotate_contaminants() - -stopifnot(pannot$nr_clean(contaminants = FALSE) == 163) -stopifnot(pannot$nr_clean(contaminants = TRUE) == 158) -stopifnot(nrow(pannot$clean(contaminants = FALSE)) == 163) -stopifnot(nrow(pannot$clean(contaminants = TRUE)) == 158) dd <- pannot$clean() tmp <- lfqdata$get_subset(dd) -stopifnot(tmp$hierarchy_counts()$protein_Id == 158) + } \seealso{ diff --git a/man/get_imputed_contrasts.Rd b/man/get_imputed_contrasts.Rd index 7bce96d97..7a59ab4d5 100644 --- a/man/get_imputed_contrasts.Rd +++ b/man/get_imputed_contrasts.Rd @@ -29,13 +29,9 @@ istar <- sim_lfq_data_peptide_config() config <- istar$config analysis <- istar$data data <- complete_cases(analysis, config) -config$parameter$qVal_individual_threshold <- 0.01 -data <- prolfqua::remove_large_QValues(data, config) -data <- complete_cases(data, config) Contrasts <- c("dilution.b-a" = "group_A - group_B", "dilution.c-e" = "group_A - group_Ctrl") res <- get_imputed_contrasts(data, config, Contrasts) - config <- config contrasts <- Contrasts imputed <- missigness_impute_factors_interactions(data, config, value = "imputed" ) diff --git a/man/nr_B_in_A_per_sample.Rd b/man/nr_B_in_A_per_sample.Rd index e3b402704..0ec952046 100644 --- a/man/nr_B_in_A_per_sample.Rd +++ b/man/nr_B_in_A_per_sample.Rd @@ -10,15 +10,10 @@ nr_B_in_A_per_sample(data, config, nested = TRUE) how many peptides per protein in each sample } \examples{ -bb <- prolfqua::prolfqua_data('data_ionstar')$filtered() -bb$config <- old2new(bb$config) -stopifnot(nrow(bb$data) == 25780) -configur <- bb$config$clone(deep=TRUE) -data <- bb$data - -nr_B_in_A_per_sample(data, configur, nested =FALSE) -bb <- prolfqua::prolfqua_data('data_IonstarProtein_subsetNorm') -bb$config <- old2new(config = bb$config$clone( deep = TRUE)) +debug(nr_B_in_A_per_sample) +bb <- prolfqua::sim_lfq_data_peptide_config() +nr_B_in_A_per_sample(bb$data, bb$config, nested =FALSE) +bb <- prolfqua::sim_lfq_data_protein_config() nr_B_in_A_per_sample(bb$data, bb$config, nested=FALSE) } diff --git a/man/nr_obs.Rd b/man/nr_obs.Rd deleted file mode 100644 index bb23a8248..000000000 --- a/man/nr_obs.Rd +++ /dev/null @@ -1,20 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/tidyMS_aggregation.R -\name{nr_obs} -\alias{nr_obs} -\title{Aggregates e.g. protein abundances from peptide abundances} -\usage{ -nr_obs(data, config, new_child = config$table$nr_children) -} -\description{ -Aggregates e.g. protein abundances from peptide abundances -} -\examples{ -dd <- prolfqua::sim_lfq_data_peptide_config() -dd$data <- na.omit(dd$data) -xd <- nr_obs(dd$data, dd$config) - -#xd |> head() - -xd$nr_children |> table() -} diff --git a/man/nr_obs_hierarchy.Rd b/man/nr_obs_hierarchy.Rd new file mode 100644 index 000000000..78abcfc23 --- /dev/null +++ b/man/nr_obs_hierarchy.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/tidyMS_R6_TransitionCorrelations.R +\name{nr_obs_hierarchy} +\alias{nr_obs_hierarchy} +\title{Aggregates e.g. protein abundances from peptide abundances} +\usage{ +nr_obs_hierarchy( + data, + config, + from_children = TRUE, + name_nr_child = "nr_child_exp" +) +} +\description{ +Aggregates e.g. protein abundances from peptide abundances +} +\examples{ +dd <- prolfqua::sim_lfq_data_peptide_config() + +xd <- nr_obs_hierarchy(dd$data, dd$config) +xd +dp <- prolfqua::sim_lfq_data_protein_config() +debug(nr_obs_hierarchy) +nr_obs_sample(dp$data, dp$config) +xd <- nr_obs_hierarchy(dp$data, dp$config) + + +} diff --git a/man/nr_obs_sample.Rd b/man/nr_obs_sample.Rd new file mode 100644 index 000000000..f2af2dc1b --- /dev/null +++ b/man/nr_obs_sample.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/tidyMS_R6_TransitionCorrelations.R +\name{nr_obs_sample} +\alias{nr_obs_sample} +\title{Aggregates e.g. protein abundances from peptide abundances} +\usage{ +nr_obs_sample(data, config, new_child = config$table$nr_children) +} +\description{ +Aggregates e.g. protein abundances from peptide abundances +} +\examples{ +dd <- prolfqua::sim_lfq_data_peptide_config() +dd$data <- na.omit(dd$data) +xd <- nr_obs_sample(dd$data, dd$config) +xd +xd$nr_children |> table() + + +dp <- prolfqua::sim_lfq_data_protein_config() +xp <- nr_obs_sample(dp$data, dp$config) +xp$nr_peptides |> table() + +}