Merge pull request #12 from renaissance-benchmarks/topic/plotting

Support for website image generation
renaissance-benchmarks · Nov 16, 2023 · f36b943 · f36b943
2 parents 029bc7f + 4a5d89a
commit f36b943
Show file tree

Hide file tree

Showing 69 changed files with 1,511 additions and 128 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: rren
 Title: Renaissance Benchmark Data Processing Package
-Version: 0.0.7
+Version: 0.0.8
 Authors@R:
     person ("Petr", "Tuma", email = "[email protected]", role = c ("aut", "cre"), comment = c (ORCID = "0000-0002-7035-2322"))
 Description: A utility package for processing the data produced by the Renaissance benchmark suite (https://renaissance.dev).
@@ -19,11 +19,14 @@ Imports:
     cli (>= 3.6.1),
     digest (>= 0.6.33),
     dplyr (>= 1.1.3),
+    forcats (>= 1.0.0),
     fs (>= 1.6.3),
+    ggplot2 (>= 3.4.4),
     glue (>= 1.6.2),
     jsonlite (>= 1.8.7),
     logger (>= 0.2.2),
     prettyunits (>= 1.2.0),
+    readr (>= 2.1.4),
     rlang (>= 1.1.1),
     stringr (>= 1.5.0),
     tibble (>= 3.2.1),

diff --git a/NAMESPACE b/NAMESPACE
@@ -3,10 +3,14 @@
 export(assert_renaissance)
 export(check_renaissance)
 export(compute_flat_percentile_ci)
+export(compute_flat_relative_percentile_ci)
 export(compute_hierarchical_percentile_ci)
+export(compute_hierarchical_relative_percentile_ci)
 export(compute_list_vectors_hierarchical_percentile_ci)
+export(compute_list_vectors_pair_hierarchical_percentile_ci)
 export(compute_trends)
 export(compute_vector_flat_percentile_ci)
+export(compute_vector_pair_flat_percentile_ci)
 export(expect_renaissance)
 export(flag_outliers_global)
 export(flag_outliers_window)
@@ -21,6 +25,9 @@ export(list_segment_boundaries)
 export(load_file_json)
 export(load_path_json)
 export(locate_vector_segments)
+export(plot_default_labels)
+export(plot_website_stripes)
+export(plot_website_violins)
 export(preserve_last_n)
 export(preserve_last_sec)
 export(remove_outliers_global)
@@ -31,6 +38,8 @@ export(rren_example)
 import(checkmate)
 import(cli)
 import(dplyr)
+import(forcats)
+import(ggplot2)
 import(glue)
 import(logger)
 import(prettyunits)

diff --git a/R/example.R b/R/example.R
@@ -13,33 +13,39 @@ rren_example <- function (file_name = '') {
 
 #' Return data frame with artificial data.
 #'
-#' @param data Data to use.
-#' @param vm_count Count of VM combinations to generate.
-#' @param run_count Count of run combinations to generate.
-#' @param benchmark_count Count of benchmark combinations to generate.
-#' @return data Frame with artificial data.
+#' Data to use for each run is the same. The result tibble
+#' is arranged to first iterate runs, then benchmarks,
+#' then virtual machines. This guarantee is useful
+#' when difference between runs is to be
+#' injected later.
+#'
+#' @param data Data to use for each run.
+#' @param run_count Count of run combinations to generate per virtual machine and benchmark.
+#' @param benchmark_count Count of benchmark combinations to generate per virtual machine.
+#' @param vm_count Count of virtual machine combinations to generate.
+#' @return data Tibble with artificial data.
 #' @export
-rren_artificial <- function (data, vm_count = 1L, run_count = 1L, benchmark_count = 1L) {
+rren_artificial <- function (data, run_count = 1L, benchmark_count = 1L, vm_count = 1L) {
 
     result <- tibble (time = data) |>
         mutate (
             index = row_number (),
             total = cumsum (data))
 
+    list_times_run <- lapply (seq (run_count), function (x) result |> mutate (run = factor (glue::glue ('Run {x}'))))
+    result_times_run <- bind_rows (list_times_run)
+
+    list_times_benchmark <- lapply (seq (benchmark_count), function (x) result_times_run |> mutate (benchmark = factor (glue::glue ('Benchmark {x}'))))
+    result_times_benchmark <- bind_rows (list_times_benchmark)
+
     list_times_vm <- lapply (seq (vm_count),
-        function (x) result |> mutate (
+        function (x) result_times_benchmark |> mutate (
             vm_name = factor (glue::glue ('Name {x}')),
             vm_version = factor (glue::glue ('Version {x}')),
             vm_configuration = factor (glue::glue ('Configuration {x}')),
             vm = factor (digest::digest (c (.data $ vm_name, .data $ vm_version, .data $ vm_configuration), algo = 'murmur32'))))
 
     result_times_vm <- bind_rows (list_times_vm)
 
-    list_times_run <- lapply (seq (run_count), function (x) result_times_vm |> mutate (run = factor (glue::glue ('Run {x}'))))
-    result_times_run <- bind_rows (list_times_run)
-
-    list_times_benchmark <- lapply (seq (benchmark_count), function (x) result_times_run |> mutate (benchmark = factor (glue::glue ('Benchmark {x}'))))
-    result_times_benchmark <- bind_rows (list_times_benchmark)
-
-    return (result_times_benchmark)
+    return (result_times_vm)
 }
diff --git a/R/filter.R b/R/filter.R
@@ -149,8 +149,8 @@ identify_vector_outliers_window <- function (.input, .window = 333, .limit = 0.0
     limits_hi <- limits [2, ] + ranges * .slack
 
     # Stretch limits across border samples.
-    limits_lo <- c (rep.int (first (limits_lo), floor ((.window - 1) / 2)), limits_lo, rep (last (limits_lo), ceiling ((.window - 1) / 2)))
-    limits_hi <- c (rep.int (first (limits_hi), floor ((.window - 1) / 2)), limits_hi, rep (last (limits_hi), ceiling ((.window - 1) / 2)))
+    limits_lo <- c (rep.int (first (limits_lo), floor ((.window - 1) / 2)), limits_lo, rep.int (last (limits_lo), ceiling ((.window - 1) / 2)))
+    limits_hi <- c (rep.int (first (limits_hi), floor ((.window - 1) / 2)), limits_hi, rep.int (last (limits_hi), ceiling ((.window - 1) / 2)))
 
     return (.input < limits_lo | .input > limits_hi)
 }

diff --git a/R/interval.R b/R/interval.R
@@ -1,34 +1,42 @@
 # ----------------------------------------------------------------
 # Bootstrap
 
-bootstrap_replicates_vector <- function (.input, .statistic, .R) {
-    # Sample one less than length to compensate variability bias.
-    # This may not make much sense for some statistics ?
-    length_reduced <- length (.input) - 1
+bootstrap_replicates_vector <- function (.input, .statistic, .R, .reduction) {
+
+    # Reduction in sample size can help compensate bootstrap standard error bias.
+    length_original <- length (.input)
+    length_reduced <- length (.input) - .reduction
+
     result <- rep.int (NA, .R)
     # TODO This can be made (much) faster by blocking.
     for (counter in seq.int (1, .R)) {
-        indices <- sample (length_reduced, replace = TRUE)
+        indices <- sample (length_original, length_reduced, replace = TRUE)
         samples <- .input [indices]
         result [counter] <- .statistic (samples)
     }
+
     return (result)
 }
 
 
-bootstrap_replicates_list_vectors <- function (.input, .statistic_outer, .statistic_inner, .R) {
-    # Sample one less than length to compensate variability bias.
-    # This may not make much sense for some statistics ?
-    length_reduced <- length (.input) - 1
-    if (length_reduced < 1) {
+bootstrap_replicates_list_vectors <- function (.input, .statistic_outer, .statistic_inner, .R, .reduction) {
+
+    # Reduction in sample size can help compensate bootstrap standard error bias.
+    length_original <- length (.input)
+    length_reduced <- length (.input) - .reduction
+
+    if (length_original == 1) {
+
         # Just one outer level, perform one level bootstrap.
-        result <- bootstrap_replicates_vector (.input [[1]], function (x) .statistic_outer (.statistic_inner (x)), .R)
+        # Note that this does not reduce sample size in inner level.
+        result <- bootstrap_replicates_vector (.input [[1]], function (x) .statistic_outer (.statistic_inner (x)), .R, 0)
+
     } else {
         # More outer levels, perform two level bootstrap.
         result <- rep.int (NA, .R)
         for (counter_result in seq.int (1, .R)) {
             # TODO This can be made (much) faster by blocking.
-            indices_outer <- sample (length_reduced, replace = TRUE)
+            indices_outer <- sample (length_original, length_reduced, replace = TRUE)
             samples_outer <- rep.int (NA, length_reduced)
             for (counter_outer in seq.int (1, length_reduced)) {
                 index_outer <- indices_outer [counter_outer]
@@ -39,6 +47,7 @@ bootstrap_replicates_list_vectors <- function (.input, .statistic_outer, .statis
             result [counter_result] <- .statistic_outer (samples_outer)
         }
     }
+
     return (result)
 }
 
@@ -52,11 +61,12 @@ bootstrap_replicates_list_vectors <- function (.input, .statistic_outer, .statis
 #' @param .statistic Statistic to compute confidence interval for.
 #' @param .confidence Confidence level.
 #' @param .R Number of replicates.
+#' @param .reduction Replicate size reduction.
 #' @return Tibble with confidence interval.
 #' @export
-compute_vector_flat_percentile_ci <- function (.input, .statistic = mean, .confidence = 0.99, .R = 10000) {
+compute_vector_flat_percentile_ci <- function (.input, .statistic = mean, .confidence = 0.99, .R = 10000, .reduction = 0) {
 
-    replicates <- bootstrap_replicates_vector (.input, .statistic, .R)
+    replicates <- bootstrap_replicates_vector (.input, .statistic, .R, .reduction)
     interval <- stats::quantile (replicates, probs = c ((1 - .confidence)/2, (1 + .confidence)/2))
     middle <- .statistic (.input)
 
@@ -71,18 +81,68 @@ compute_vector_flat_percentile_ci <- function (.input, .statistic = mean, .confi
 #' @param .statistic_inner Statistic to apply at the inner leve.
 #' @param .confidence Confidence level.
 #' @param .R Number of replicates.
+#' @param .reduction Replicate size reduction.
 #' @return Tibble with confidence interval.
 #' @export
-compute_list_vectors_hierarchical_percentile_ci <- function (.input, .statistic_outer = mean, .statistic_inner = mean, .confidence = 0.99, .R = 10000) {
+compute_list_vectors_hierarchical_percentile_ci <- function (.input, .statistic_outer = mean, .statistic_inner = mean, .confidence = 0.99, .R = 10000, .reduction = 0) {
 
-    replicates <- bootstrap_replicates_list_vectors (.input, .statistic_outer, .statistic_inner, .R)
+    replicates <- bootstrap_replicates_list_vectors (.input, .statistic_outer, .statistic_inner, .R, .reduction)
     interval <- stats::quantile (replicates, probs = c ((1 - .confidence)/2, (1 + .confidence)/2))
     middle <- .statistic_outer (vapply (.input, .statistic_inner, numeric (1)))
 
     tibble (lo = interval [1], hi = interval [2], mid = middle)
 }
 
 
+#' Compute flat percentile bootstrap confidence interval on data set pair.
+#'
+#' @param .input_one Data set one to compute with.
+#' @param .input_two Data set two to compute with.
+#' @param .combination Combination to compute for both data sets.
+#' @param .statistic Statistic to compute for each data set.
+#' @param .confidence Confidence level.
+#' @param .R Number of replicates.
+#' @param .reduction Replicate size reduction.
+#' @return Tibble with confidence interval.
+#' @export
+compute_vector_pair_flat_percentile_ci <- function (.input_one, .input_two, .combination = .Primitive ('-'), .statistic = mean, .confidence = 0.99, .R = 10000, .reduction = 0) {
+
+    replicates_one <- bootstrap_replicates_vector (.input_one, .statistic, .R, .reduction)
+    replicates_two <- bootstrap_replicates_vector (.input_two, .statistic, .R, .reduction)
+    replicates <- .combination (replicates_one, replicates_two)
+    interval <- stats::quantile (replicates, probs = c ((1 - .confidence)/2, (1 + .confidence)/2))
+    middle <- .combination (.statistic (.input_one), .statistic (.input_two))
+
+    tibble (lo = interval [1], hi = interval [2], mid = middle)
+}
+
+
+#' Compute hierarchical percentile bootstrap confidence interval on data set pair.
+#'
+#' @param .input_one Data set one to compute with.
+#' @param .input_two Data set two to compute with.
+#' @param .combination Combination to compute for both data sets.
+#' @param .statistic_outer Statistic to compute at the outer level.
+#' @param .statistic_inner Statistic to compute at the inner leve.
+#' @param .confidence Confidence level.
+#' @param .R Number of replicates.
+#' @param .reduction Replicate size reduction.
+#' @return Tibble with confidence interval.
+#' @export
+compute_list_vectors_pair_hierarchical_percentile_ci <- function (.input_one, .input_two, .combination = .Primitive ('-'), .statistic_outer = mean, .statistic_inner = mean, .confidence = 0.99, .R = 10000, .reduction = 0) {
+
+    replicates_one <- bootstrap_replicates_list_vectors (.input_one, .statistic_outer, .statistic_inner, .R, .reduction)
+    replicates_two <- bootstrap_replicates_list_vectors (.input_two, .statistic_outer, .statistic_inner, .R, .reduction)
+    replicates <- .combination (replicates_one, replicates_two)
+    interval <- stats::quantile (replicates, probs = c ((1 - .confidence)/2, (1 + .confidence)/2))
+    middle <- .combination (
+        .statistic_outer (vapply (.input_one, .statistic_inner, numeric (1))),
+        .statistic_outer (vapply (.input_two, .statistic_inner, numeric (1))))
+
+    tibble (lo = interval [1], hi = interval [2], mid = middle)
+}
+
+
 #' Compute flat percentile bootstrap confidence interval.
 #'
 #' Uses [compute_vector_flat_percentile_ci()] to compute percentile confidence interval on data that is not structured into runs.
@@ -96,7 +156,7 @@ compute_flat_percentile_ci <- function (.input, .column, ...) {
     assert_renaissance (.input, .check_index = FALSE, .check_total = FALSE, .check_metadata = FALSE)
 
     .input |>
-        group_by (.data $ vm, .data $ benchmark) |>
+        group_by (.data $ vm, across (starts_with ('vm_')), .data $ benchmark) |>
         reframe (compute_vector_flat_percentile_ci ({{ .column }}, ...))
 }
 
@@ -107,13 +167,81 @@ compute_flat_percentile_ci <- function (.input, .column, ...) {
 #'
 #' @param .input Data structured into runs.
 #' @param .column Column to compute confidence interval for.
-#' @param ... Parameters to [compute_vector_flat_percentile_ci()].
+#' @param ... Parameters to [compute_list_vectors_hierarchical_percentile_ci()].
 #' @return Summarized tibble with confidence interval columns.
 #' @export
 compute_hierarchical_percentile_ci <- function (.input, .column, ...) {
     assert_renaissance (.input, .check_index = FALSE, .check_total = FALSE, .check_metadata = FALSE)
 
     .input |>
-        group_by (.data $ vm, .data $ benchmark) |>
+        group_by (.data $ vm, across (starts_with ('vm_')), .data $ benchmark) |>
         reframe (compute_list_vectors_hierarchical_percentile_ci (split ({{ .column }}, .data $ run, drop = TRUE), ...))
 }
+
+
+#' Compute flat relative percentile bootstrap confidence interval.
+#'
+#' Uses [compute_vector_pair_flat_percentile_ci()] to compute percentile
+#' confidence interval on data that is not structured into runs.
+#'
+#' The statistic is computed relative to baseline virtual machine.
+#'
+#' @param .input Data not structured into runs.
+#' @param .baseline Baseline virtual machine.
+#' @param .column Column to compute confidence interval for.
+#' @param ... Parameters to [compute_vector_pair_flat_percentile_ci()].
+#' @return Summarized tibble with confidence interval columns.
+#' @export
+compute_flat_relative_percentile_ci <- function (.input, .baseline, .column, ...) {
+    assert_renaissance (.input, .check_index = FALSE, .check_total = FALSE, .check_metadata = FALSE)
+
+    # Subset baseline to reduce amount of filtering per benchmark.
+    baseline_for_vm <- .input |> filter (.data $ vm == .baseline)
+
+    baseline_for_benchmark <- function (benchmark) {
+        force (benchmark)
+        baseline <- baseline_for_vm |> filter (.data $ benchmark == .env $ benchmark)
+        return (baseline |> pull ({{ .column }}))
+    }
+
+    .input |>
+        group_by (.data $ vm, across (starts_with ('vm_')), .data $ benchmark) |>
+        reframe (compute_vector_pair_flat_percentile_ci (
+            {{ .column }},
+            baseline_for_benchmark (cur_group () $ benchmark),
+            ...))
+}
+
+
+#' Compute hierarchical relative percentile bootstrap confidence interval.
+#'
+#' Uses [compute_list_vectors_pair_hierarchical_percentile_ci()] to compute percentile
+#' confidence interval on data that is structured into runs.
+#'
+#' The statistic is computed relative to baseline virtual machine.
+#'
+#' @param .input Data structured into runs.
+#' @param .baseline Baseline virtual machine.
+#' @param .column Column to compute confidence interval for.
+#' @param ... Parameters to [compute_list_vectors_pair_hierarchical_percentile_ci()].
+#' @return Summarized tibble with confidence interval columns.
+#' @export
+compute_hierarchical_relative_percentile_ci <- function (.input, .baseline, .column, ...) {
+    assert_renaissance (.input, .check_index = FALSE, .check_total = FALSE, .check_metadata = FALSE)
+
+    # Subset baseline to reduce amount of filtering per benchmark.
+    baseline_for_vm <- .input |> filter (.data $ vm == .baseline)
+
+    baseline_for_benchmark <- function (benchmark) {
+        force (benchmark)
+        baseline <- baseline_for_vm |> filter (.data $ benchmark == .env $ benchmark)
+        return (split (baseline_for_vm |> pull ({{ .column }}), baseline_for_vm $ run, drop = TRUE))
+    }
+
+    .input |>
+        group_by (.data $ vm, across (starts_with ('vm_')), .data $ benchmark) |>
+        reframe (compute_list_vectors_pair_hierarchical_percentile_ci (
+            split ({{ .column }}, .data $ run, drop = TRUE),
+            baseline_for_benchmark (cur_group () $ benchmark),
+            ...))
+}