Skip to content

Commit

Permalink
improve support for peptide counts
Browse files Browse the repository at this point in the history
  • Loading branch information
wolski committed Apr 6, 2024
1 parent 089b863 commit c1150db
Show file tree
Hide file tree
Showing 10 changed files with 131 additions and 81 deletions.
3 changes: 2 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,8 @@ export(names_to_matrix)
export(normalize_log2_robscale)
export(nr_B_in_A)
export(nr_B_in_A_per_sample)
export(nr_obs)
export(nr_obs_hierarchy)
export(nr_obs_sample)
export(old2new)
export(pairs_smooth)
export(pairs_w_abline)
Expand Down
17 changes: 7 additions & 10 deletions R/ProteinAnnotation.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,22 +5,17 @@
#' @family LFQData
#' @examples
#'
#' istar <- prolfqua_data('data_ionstar')$filtered()
#' istar$config <- old2new(istar$config)
#' data <- istar$data #|> dplyr::filter(protein_Id %in% sample(protein_Id, 100))
#' istar <-prolfqua::sim_lfq_data_peptide_config()
#' data <- istar$data
#'
#'
#' lfqdata <- LFQData$new(data, istar$config)
#' pannot <- ProteinAnnotation$new( lfqdata )
#'
#' pannot$annotate_decoys()
#' pannot$annotate_contaminants()
#'
#' stopifnot(pannot$nr_clean(contaminants = FALSE) == 163)
#' stopifnot(pannot$nr_clean(contaminants = TRUE) == 158)
#' stopifnot(nrow(pannot$clean(contaminants = FALSE)) == 163)
#' stopifnot(nrow(pannot$clean(contaminants = TRUE)) == 158)
#' dd <- pannot$clean()
#' tmp <- lfqdata$get_subset(dd)
#' stopifnot(tmp$hierarchy_counts()$protein_Id == 158)
#'
#'
ProteinAnnotation <-
R6::R6Class("ProteinAnnotation",
Expand Down Expand Up @@ -54,6 +49,8 @@ ProteinAnnotation <-
} else {
self$row_annot <- distinct(select(lfqdata$data, self$pID))
}


},
#' @description
#' annotate rev sequences
Expand Down
66 changes: 57 additions & 9 deletions R/tidyMS_R6_TransitionCorrelations.R
Original file line number Diff line number Diff line change
Expand Up @@ -645,18 +645,14 @@ nr_B_in_A <- function(pdata, config , merge = TRUE){
#' @keywords internal
#' @family summary
#' @examples
#' bb <- prolfqua::prolfqua_data('data_ionstar')$filtered()
#' bb$config <- old2new(bb$config)
#' stopifnot(nrow(bb$data) == 25780)
#' configur <- bb$config$clone(deep=TRUE)
#' data <- bb$data
#'
#' nr_B_in_A_per_sample(data, configur, nested =FALSE)
#' bb <- prolfqua::prolfqua_data('data_IonstarProtein_subsetNorm')
#' bb$config <- old2new(config = bb$config$clone( deep = TRUE))
#' debug(nr_B_in_A_per_sample)
#' bb <- prolfqua::sim_lfq_data_peptide_config()
#' nr_B_in_A_per_sample(bb$data, bb$config, nested =FALSE)
#' bb <- prolfqua::sim_lfq_data_protein_config()
#' nr_B_in_A_per_sample(bb$data, bb$config, nested=FALSE)
#'
nr_B_in_A_per_sample <- function(data, config, nested = TRUE){
#TODO wew check for deprecation since not used.
cf <- config

levelA <- cf$table$hierarchy_keys_depth()
Expand Down Expand Up @@ -687,6 +683,58 @@ nr_B_in_A_per_sample <- function(data, config, nested = TRUE){



#' Aggregates e.g. protein abundances from peptide abundances
#'
#' @export
#' @examples
#' dd <- prolfqua::sim_lfq_data_peptide_config()
#' dd$data <- na.omit(dd$data)
#' xd <- nr_obs_sample(dd$data, dd$config)
#' xd
#' xd$nr_children |> table()
#'
#'
#' dp <- prolfqua::sim_lfq_data_protein_config()
#' xp <- nr_obs_sample(dp$data, dp$config)
#' xp$nr_peptides |> table()
#'
nr_obs_sample <- function(data, config, new_child = config$table$nr_children){
data <- na.omit(data)
nr_children <- data |>
group_by(!!!rlang::syms(c(config$table$hierarchy_keys_depth(), config$table$fileName))) |>
summarize(!!new_child := sum(!!sym(config$table$nr_children), na.rm = TRUE), .groups = "drop")
return(nr_children)
}

#' Aggregates e.g. protein abundances from peptide abundances
#'
#' @export
#' @examples
#' dd <- prolfqua::sim_lfq_data_peptide_config()
#'
#' xd <- nr_obs_hierarchy(dd$data, dd$config)
#' xd
#' dp <- prolfqua::sim_lfq_data_protein_config()
#' debug(nr_obs_hierarchy)
#' nr_obs_sample(dp$data, dp$config)
#' xd <- nr_obs_hierarchy(dp$data, dp$config)
#'
#'
nr_obs_hierarchy <- function(data, config, from_children = TRUE , name_nr_child = "nr_child_exp"){
tb <- config$table
if (!from_children & (tb$hierarchyDepth < length(tb$hierarchy_keys())) ) {
xq <- data |> tidyr::select(tb$hierarchy_keys()) |>
distinct() |>
dplyr::group_by(!!sym(tb$hierarchy_keys_depth())) |>
dplyr::summarize(!!name_nr_child := dplyr::n(), .groups = "drop")
} else {
xz <- nr_obs_sample(data,config)
xz <- x |> group_by(!!sym(tb$hierarchy_keys_depth())) |>
summarize(!!name_nr_child := max(!!sym(tb$nr_children)), .groups = "drop")
}
}


# Summarize Intensities by Intensity or NAs ----
.rankProteinPrecursors <- function(data,
config,
Expand Down
22 changes: 4 additions & 18 deletions R/tidyMS_aggregation.R
Original file line number Diff line number Diff line change
Expand Up @@ -620,28 +620,14 @@ estimate_intensity <- function(data, config, .func)
dplyr::ungroup()

new_child = paste0("nr_",config$table$hierarchy_keys_depth())
res_nr_children <- nr_obs(data, config, new_child = new_child)
res_nr_children <- nr_obs_sample(data, config, new_child = new_child)
unnested <- inner_join(unnested, res_nr_children, by = c(config$table$hierarchy_keys_depth(), config$table$fileName))
newconfig$table$nr_children = new_child
return(list(data = unnested, config = newconfig))
}

#' Aggregates e.g. protein abundances from peptide abundances
#'
#' @export
#' @examples
#' dd <- prolfqua::sim_lfq_data_peptide_config()
#' dd$data <- na.omit(dd$data)
#' xd <- nr_obs(dd$data, dd$config)
#'
#' #xd |> head()
#'
#' xd$nr_children |> table()
nr_obs <- function(data, config, new_child = config$table$nr_children){
nr_children <- data |> group_by(!!!rlang::syms(c(config$table$hierarchy_keys_depth(), config$table$fileName))) |>
summarize(!!new_child := sum(!!sym(config$table$nr_children), na.rm = TRUE))
return(nr_children)
}



#' Plot feature data and result of aggregation
#'
Expand Down Expand Up @@ -767,7 +753,7 @@ aggregate_intensity_topN <- function(pdata , config, .func, N = 3){
hierarchy = config$table$hierarchy[seq_len(config$table$hierarchyDepth)])

new_child_name <- paste0("nr_", config$table$hierarchy_keys_depth() )
res_nr_children <- nr_obs(pdata, config, new_child = new_child_name)
res_nr_children <- nr_obs_sample(pdata, config, new_child = new_child_name)
sumTopInt <- inner_join(
sumTopInt, res_nr_children,
by = c(config$table$fileName, config$table$hierarchy_keys_depth()))
Expand Down
15 changes: 5 additions & 10 deletions man/ProteinAnnotation.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 0 additions & 4 deletions man/get_imputed_contrasts.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

13 changes: 4 additions & 9 deletions man/nr_B_in_A_per_sample.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

20 changes: 0 additions & 20 deletions man/nr_obs.Rd

This file was deleted.

28 changes: 28 additions & 0 deletions man/nr_obs_hierarchy.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

24 changes: 24 additions & 0 deletions man/nr_obs_sample.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit c1150db

Please sign in to comment.