diff --git a/NAMESPACE b/NAMESPACE index 74481263e..0cdf53c11 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -165,7 +165,7 @@ export(separate_factors) export(separate_hierarchy) export(setup_analysis) export(sim_lfq_data) -export(sim_lfq_data_config) +export(sim_lfq_data_peptide_config) export(spread_response_by_IsotopeLabel) export(squeezeVarRob) export(strategy_glm) diff --git a/R/simulate_LFQ_data.R b/R/simulate_LFQ_data.R index 9956caf3e..e3c4d8280 100644 --- a/R/simulate_LFQ_data.R +++ b/R/simulate_LFQ_data.R @@ -1,27 +1,22 @@ -#' simulate peptide level data with two groups +#' simulate protein level data with two groups #' @export #' @param Nprot number of porteins #' @param N group size #' @param fc D - down regulation N - matrix, U - regulation #' @param prop proportion of down (D), up (U) and not regulated (N) #' @examples -#' res <- sim_lfq_data() -#' sim_lfq_data(Nprot = 10) -#' -#' dd <- sim_lfq_data() -#' dd$abundance <- add_missing(dd$abundance) #' -#' sim_lfq_data_config() -#' sim_lfq_data_config(with_missing = FALSE) - +#' sim_lfq_data(Nprot = 10) +#' res <- sim_lfq_data(Nprot = 10, PEPTIDE = TRUE) sim_lfq_data <- function( Nprot = 20, N = 4, fc = list(A = c(D = -2, U = 2, N = 0), B = c(D = 1, U = -4)), prop = list(A = c(D = 10, U = 10), B = c(D = 5, U = 20)), mean_prot = 20, - sd = 1.2, - probability_of_success = 0.3 + sdlog = log(1.2), + probability_of_success = 0.3, + PEPTIDE = FALSE ) { proteins <- stringi::stri_rand_strings(Nprot, 6) @@ -32,7 +27,7 @@ sim_lfq_data <- function( prot <- data.frame( proteinID = proteins, nrPeptides = nrpeptides, - average_prot_abundance = rlnorm(Nprot,log(20),sdlog = log(sd)), + average_prot_abundance = rlnorm(Nprot,log(20),sdlog = sdlog), mean_Ctrl = 0, N_Ctrl = N, sd = 1 @@ -57,10 +52,16 @@ sim_lfq_data <- function( prot <- prot |> dplyr::mutate(!!groupMean := FC, !!groupSize := N) } - # add row for each protein - peptide_df <- prot |> tidyr::uncount( nrPeptides ) - # create peptide ids - peptide_df$peptideID <- stringi::stri_rand_strings(sum(prot$nrPeptides), 8) + if (PEPTIDE) { + + # add row for each protein + peptide_df <- prot |> tidyr::uncount( nrPeptides ) + # create peptide ids + peptide_df$peptideID <- stringi::stri_rand_strings(sum(prot$nrPeptides), 8) + } else { + peptide_df <- prot + } + # transform into long format peptide_df2 <- peptide_df |> tidyr::pivot_longer(cols = tidyselect::starts_with(c("mean", "N_")), @@ -68,21 +69,29 @@ sim_lfq_data <- function( peptide_df2 <- peptide_df2 |> tidyr::separate(group, c("what", "group")) peptide_df2 <- peptide_df2 |> tidyr::pivot_wider(names_from = "what", values_from = mean) - peptide_df2$avg_peptide_abd <- - with(peptide_df2, - rlnorm(nrow(peptide_df2), - meanlog = log(average_prot_abundance - mean), - sdlog = log(sd))) - sample_from_normal <- function(mean, sd, n) { rnorm(n, mean, sd) } - nrpep <- nrow(peptide_df2) sampled_data <- matrix(nrow = nrpep, ncol = N) - colnames(sampled_data) <- c("V1","V2","V3","V4") - for (i in seq_len(nrpep)) { - sampled_data[i,] <- sample_from_normal(peptide_df2$avg_peptide_abd[i], peptide_df2$sd[1], peptide_df2$N[i]) + colnames(sampled_data) <- paste0("V", 1:ncol(sampled_data)) + + peptide_df2$average_prot_abundance <- peptide_df2$average_prot_abundance - peptide_df2$mean + + if (PEPTIDE) { + peptide_df2$avg_peptide_abd <- + with(peptide_df2, + rlnorm(nrow(peptide_df2), + meanlog = log(average_prot_abundance), + sdlog = sdlog)) + for (i in seq_len(nrpep)) { + sampled_data[i,] <- sample_from_normal(peptide_df2$avg_peptide_abd[i], peptide_df2$sd[1], peptide_df2$N[i]) + } + + } else { + for (i in seq_len(nrpep)) { + sampled_data[i,] <- sample_from_normal(peptide_df2$average_prot_abundance[i], peptide_df2$sd[1], peptide_df2$N[i]) + } } x <- dplyr::bind_cols(peptide_df2,sampled_data) @@ -97,6 +106,7 @@ sim_lfq_data <- function( return(peptideAbundances) } + #' add missing values to x vector based on the values of x #' @export #' @param x vector of intensities @@ -122,8 +132,8 @@ add_missing <- function(x){ #' @export #' @examples #' -sim_lfq_data_config <- function(Nprot = 10, with_missing = TRUE){ - data <- sim_lfq_data(Nprot = Nprot) +sim_lfq_data_peptide_config <- function(Nprot = 10, with_missing = TRUE){ + data <- sim_lfq_data(Nprot = Nprot, PEPTIDE = TRUE) if (with_missing) { data$abundance <- add_missing(data$abundance) } @@ -141,3 +151,23 @@ sim_lfq_data_config <- function(Nprot = 10, with_missing = TRUE){ adata <- setup_analysis(data, config) return(list(data = adata, config = config)) } + +sim_lfq_data_protein_config <- function(Nprot = 10, with_missing = TRUE){ + data <- sim_lfq_data(Nprot = Nprot, PEPTIDE = FALSE) + if (with_missing) { + data$abundance <- add_missing(data$abundance) + } + data$isotopeLabel <- "light" + data$qValue <- 0 + + atable <- AnalysisTableAnnotation$new() + atable$sampleName = "sample" + atable$factors["group_"] = "group" + atable$hierarchy[["protein_Id"]] = "proteinID" + atable$set_response("abundance") + + config <- AnalysisConfiguration$new(atable) + adata <- setup_analysis(data, config) + return(list(data = adata, config = config)) +} + diff --git a/R/tidyMS_plotting.R b/R/tidyMS_plotting.R index 265f87c0b..5e8d703fa 100644 --- a/R/tidyMS_plotting.R +++ b/R/tidyMS_plotting.R @@ -207,19 +207,25 @@ plot_hierarchies_boxplot <- function(pdata, #' @keywords internal #' @examples #' -#' iostar <- prolfqua_data('data_ionstar')$filtered() -#' iostar$config <- old2new(iostar$config) -#' iostar$data <- iostar$data |> +#' #iostar <- prolfqua_data('data_ionstar')$filtered() +#' #iostar$config <- old2new(iostar$config) +#' #iostar$data <- iostar$data |> +#' # dplyr::filter(protein_Id %in% sample(protein_Id, 2)) +#' #unique(iostar$data$protein_Id) +#' +#' istar <- sim_lfq_data_config() +#' config <- istar$config +#' analysis <- istar$data +#' analysis <- analysis |> #' dplyr::filter(protein_Id %in% sample(protein_Id, 2)) -#' unique(iostar$data$protein_Id) #' -#' res <- plot_hierarchies_boxplot_df(iostar$data,iostar$config) +#' res <- plot_hierarchies_boxplot_df(analysis,config) #' res$boxplot[[1]] -#' res <- plot_hierarchies_boxplot_df(iostar$data,iostar$config,iostar$config$table$hierarchy_keys()[1]) +#' res <- plot_hierarchies_boxplot_df(analysis,config,config$table$hierarchy_keys()[1]) #' res$boxplot[[1]] -#' res <- plot_hierarchies_boxplot_df(iostar$data,iostar$config, -#' iostar$config$table$hierarchy_keys()[1], -#' facet_grid_on = iostar$config$table$hierarchy_keys()[2]) +#' res <- plot_hierarchies_boxplot_df(analysis,config, +#' config$table$hierarchy_keys()[1], +#' facet_grid_on = config$table$hierarchy_keys()[2]) #' res$boxplot[[1]] #' #' bb <- prolfqua_data('data_IonstarProtein_subsetNorm') @@ -266,8 +272,8 @@ plot_hierarchies_boxplot_df <- function(pdata, #' @family plotting #' @examples #' -#' istar <- prolfqua_data('data_ionstar')$filtered() -#' config <- old2new(istar$config$clone(deep=TRUE)) +#' istar <- sim_lfq_data_config() +#' config <- istar$config #' analysis <- istar$data #' #' pheat_map <- prolfqua::plot_heatmap_cor( analysis, config ) @@ -322,20 +328,21 @@ plot_heatmap_cor <- function(data, #' plot heatmap with annotations #' #' @export +#' @param na_fraction fraction of NA values per row +#' @param show_rownames if TRUE shows row names, default FALSE #' @keywords internal #' @family plotting #' @examples #' -#' istar <- prolfqua_data('data_ionstar')$filtered() -#' stopifnot(nrow(istar$data) == 25780) -#' config <- old2new(istar$config$clone(deep=TRUE)) +#' istar <- sim_lfq_data_config() +#' config <- istar$config #' analysis <- istar$data #' -#' plot.new() +#' #' p <- plot_heatmap(analysis, config) +#' stopifnot(class(p) == "pheatmap") #' p2 <- plot_heatmap(analysis, config, show_rownames = TRUE) -#' plot.new() -#' p2 +#' stopifnot(class(p) == "pheatmap") #' plot_heatmap <- function(data, config, @@ -394,17 +401,19 @@ plot_heatmap <- function(data, #' @family plotting #' @export #' @examples -#' bb <- prolfqua_data('data_IonstarProtein_subsetNorm') -#' new <- list(config = bb$config$clone( deep = TRUE), data = bb$data) -#' istar <- LFQData$new(new$data, new$config) -#' config <- old2new(istar$config$clone(deep=TRUE)) +#' +#' istar <- sim_lfq_data_config() +#' config <- istar$config #' analysis <- istar$data -#' dev.off() #' rs <- plot_raster(analysis, config, show_rownames=FALSE) -#' print(rs) -#' plot_raster(analysis[1,], config) -#' plot_raster(analysis, config, "var") -#' plot_raster(analysis, config, show_rownames = TRUE) +#' stopifnot(class(rs) == "pheatmap") +#' rs <- plot_raster(analysis[1,], config) +#' stopifnot(is.null(rs)) +#' rs <- plot_raster(analysis, config, "var") +#' stopifnot(class(rs) == "pheatmap") +#' rs <- plot_raster(analysis, config, show_rownames = TRUE) +#' stopifnot(class(rs) == "pheatmap") +#' plot_raster <- function(data, config, arrange = c("mean", "var"), diff --git a/man/plot_heatmap.Rd b/man/plot_heatmap.Rd index 53ce0ecb2..7ed3da627 100644 --- a/man/plot_heatmap.Rd +++ b/man/plot_heatmap.Rd @@ -6,21 +6,25 @@ \usage{ plot_heatmap(data, config, na_fraction = 0.4, show_rownames = FALSE, ...) } +\arguments{ +\item{na_fraction}{fraction of NA values per row} + +\item{show_rownames}{if TRUE shows row names, default FALSE} +} \description{ plot heatmap with annotations } \examples{ -istar <- prolfqua_data('data_ionstar')$filtered() -stopifnot(nrow(istar$data) == 25780) -config <- old2new(istar$config$clone(deep=TRUE)) +istar <- sim_lfq_data_config() +config <- istar$config analysis <- istar$data -plot.new() + p <- plot_heatmap(analysis, config) +stopifnot(class(p) == "pheatmap") p2 <- plot_heatmap(analysis, config, show_rownames = TRUE) -plot.new() -p2 +stopifnot(class(p) == "pheatmap") } \seealso{ diff --git a/man/plot_heatmap_cor.Rd b/man/plot_heatmap_cor.Rd index 54be18110..f54cef73d 100644 --- a/man/plot_heatmap_cor.Rd +++ b/man/plot_heatmap_cor.Rd @@ -17,8 +17,8 @@ plot correlation heatmap with annotations } \examples{ -istar <- prolfqua_data('data_ionstar')$filtered() -config <- old2new(istar$config$clone(deep=TRUE)) +istar <- sim_lfq_data_config() +config <- istar$config analysis <- istar$data pheat_map <- prolfqua::plot_heatmap_cor( analysis, config ) diff --git a/man/plot_hierarchies_boxplot_df.Rd b/man/plot_hierarchies_boxplot_df.Rd index 9359faed7..977196b70 100644 --- a/man/plot_hierarchies_boxplot_df.Rd +++ b/man/plot_hierarchies_boxplot_df.Rd @@ -25,19 +25,25 @@ generates peptide level plots for all Proteins } \examples{ - iostar <- prolfqua_data('data_ionstar')$filtered() - iostar$config <- old2new(iostar$config) - iostar$data <- iostar$data |> + #iostar <- prolfqua_data('data_ionstar')$filtered() + #iostar$config <- old2new(iostar$config) + #iostar$data <- iostar$data |> + # dplyr::filter(protein_Id \%in\% sample(protein_Id, 2)) + #unique(iostar$data$protein_Id) + + istar <- sim_lfq_data_config() + config <- istar$config + analysis <- istar$data + analysis <- analysis |> dplyr::filter(protein_Id \%in\% sample(protein_Id, 2)) - unique(iostar$data$protein_Id) - res <- plot_hierarchies_boxplot_df(iostar$data,iostar$config) + res <- plot_hierarchies_boxplot_df(analysis,config) res$boxplot[[1]] - res <- plot_hierarchies_boxplot_df(iostar$data,iostar$config,iostar$config$table$hierarchy_keys()[1]) + res <- plot_hierarchies_boxplot_df(analysis,config,config$table$hierarchy_keys()[1]) res$boxplot[[1]] - res <- plot_hierarchies_boxplot_df(iostar$data,iostar$config, - iostar$config$table$hierarchy_keys()[1], - facet_grid_on = iostar$config$table$hierarchy_keys()[2]) + res <- plot_hierarchies_boxplot_df(analysis,config, + config$table$hierarchy_keys()[1], + facet_grid_on = config$table$hierarchy_keys()[2]) res$boxplot[[1]] bb <- prolfqua_data('data_IonstarProtein_subsetNorm') diff --git a/man/plot_raster.Rd b/man/plot_raster.Rd index f901ac44c..2faf080f3 100644 --- a/man/plot_raster.Rd +++ b/man/plot_raster.Rd @@ -28,17 +28,19 @@ plot_raster( plot heatmap without any clustering (use to show NA's) } \examples{ - bb <- prolfqua_data('data_IonstarProtein_subsetNorm') - new <- list(config = bb$config$clone( deep = TRUE), data = bb$data) -istar <- LFQData$new(new$data, new$config) -config <- old2new(istar$config$clone(deep=TRUE)) + +istar <- sim_lfq_data_config() +config <- istar$config analysis <- istar$data -dev.off() rs <- plot_raster(analysis, config, show_rownames=FALSE) -print(rs) -plot_raster(analysis[1,], config) -plot_raster(analysis, config, "var") -plot_raster(analysis, config, show_rownames = TRUE) +stopifnot(class(rs) == "pheatmap") +rs <- plot_raster(analysis[1,], config) +stopifnot(is.null(rs)) +rs <- plot_raster(analysis, config, "var") +stopifnot(class(rs) == "pheatmap") +rs <- plot_raster(analysis, config, show_rownames = TRUE) +stopifnot(class(rs) == "pheatmap") + } \seealso{ Other plotting: diff --git a/man/sim_lfq_data.Rd b/man/sim_lfq_data.Rd index a8b99fcc4..52d328b52 100644 --- a/man/sim_lfq_data.Rd +++ b/man/sim_lfq_data.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/simulate_LFQ_data.R \name{sim_lfq_data} \alias{sim_lfq_data} -\title{simulate peptide level data with two groups} +\title{simulate protein level data with two groups} \usage{ sim_lfq_data( Nprot = 20, @@ -10,8 +10,9 @@ sim_lfq_data( fc = list(A = c(D = -2, U = 2, N = 0), B = c(D = 1, U = -4)), prop = list(A = c(D = 10, U = 10), B = c(D = 5, U = 20)), mean_prot = 20, - sd = 1.2, - probability_of_success = 0.3 + sdlog = log(1.2), + probability_of_success = 0.3, + PEPTIDE = FALSE ) } \arguments{ @@ -24,15 +25,10 @@ sim_lfq_data( \item{prop}{proportion of down (D), up (U) and not regulated (N)} } \description{ -simulate peptide level data with two groups +simulate protein level data with two groups } \examples{ -res <- sim_lfq_data() -sim_lfq_data(Nprot = 10) - -dd <- sim_lfq_data() -dd$abundance <- add_missing(dd$abundance) -sim_lfq_data_config() -sim_lfq_data_config(with_missing = FALSE) +sim_lfq_data(Nprot = 10) +res <- sim_lfq_data(Nprot = 10, PEPTIDE = TRUE) } diff --git a/man/sim_lfq_data_config.Rd b/man/sim_lfq_data_peptide_config.Rd similarity index 58% rename from man/sim_lfq_data_config.Rd rename to man/sim_lfq_data_peptide_config.Rd index e70cde728..e9b50444c 100644 --- a/man/sim_lfq_data_config.Rd +++ b/man/sim_lfq_data_peptide_config.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/simulate_LFQ_data.R -\name{sim_lfq_data_config} -\alias{sim_lfq_data_config} +\name{sim_lfq_data_peptide_config} +\alias{sim_lfq_data_peptide_config} \title{Simulate data with config} \usage{ -sim_lfq_data_config(Nprot = 10, with_missing = TRUE) +sim_lfq_data_peptide_config(Nprot = 10, with_missing = TRUE) } \description{ Simulate data with config