From 03f725c3fe44492eaad7999a288f9ad82908d2ae Mon Sep 17 00:00:00 2001
From: shafin <shafin@google.com>
Date: Mon, 2 Dec 2024 11:08:14 -0800
Subject: [PATCH] Update all DeepVariant case-studies

PiperOrigin-RevId: 702025261
---
 docs/deepvariant-vg-case-study.md |  31 ++++-----
 docs/metrics.md                   | 107 +++++++++++++++++++-----------
 2 files changed, 80 insertions(+), 58 deletions(-)

diff --git a/docs/deepvariant-vg-case-study.md b/docs/deepvariant-vg-case-study.md
index 86a38ef6..24f0a551 100644
--- a/docs/deepvariant-vg-case-study.md
+++ b/docs/deepvariant-vg-case-study.md
@@ -4,7 +4,7 @@
 This is an example to run `vg giraffe`, so we can go from FASTQs --> BAM.
 
 For simplicity and consistency, we run the following with a
-[Google Cloud instance with 64 cores](deepvariant-details.md#command-for-a-cpu-only-machine-on-google-cloud-platform).
+[Google Cloud instance with 96 cores](deepvariant-details.md#command-for-a-cpu-only-machine-on-google-cloud-platform).
 
 I added more disks because 300G is not enough for the example below. I changed
 it to `--boot-disk-size "1000"`.
@@ -184,11 +184,11 @@ And then, run DeepVariant.
 [DeepVariant Case Study](deepvariant-case-study.md).)
 
 ```bash
-BIN_VERSION="1.7.0"
+BIN_VERSION="1.8.0"
 
 sudo docker pull google/deepvariant:"${BIN_VERSION}"
 
-time sudo docker run --rm \
+time sudo docker run \
   -v "${DATA_DIR}":"${DATA_DIR}" \
   -v "${PWD}:${PWD}" \
   google/deepvariant:"${BIN_VERSION}" \
@@ -204,9 +204,9 @@ time sudo docker run --rm \
 
 Stage                            | Time (minutes)
 -------------------------------- | -----------------
-make_examples                    | 101m31.676s
-call_variants                    | 215m33.631s
-postprocess_variants (with gVCF) | 24m44.242s
+make_examples                    | 59m19.845s
+call_variants                    | 49m41.643s
+postprocess_variants (with gVCF) | 7m46.195s
 
 
 ### Run hap.py
@@ -244,21 +244,16 @@ Output:
 ```
 Benchmarking Summary:
 Type Filter  TRUTH.TOTAL  TRUTH.TP  TRUTH.FN  QUERY.TOTAL  QUERY.FP  QUERY.UNK  FP.gt  FP.al  METRIC.Recall  METRIC.Precision  METRIC.Frac_NA  METRIC.F1_Score  TRUTH.TOTAL.TiTv_ratio  QUERY.TOTAL.TiTv_ratio  TRUTH.TOTAL.het_hom_ratio  QUERY.TOTAL.het_hom_ratio
-INDEL    ALL       504501    502283      2218       958181      1471     433079    913    351       0.995604          0.997199        0.451980         0.996400                     NaN                     NaN                   1.489759                   1.954212
-INDEL   PASS       504501    502283      2218       958181      1471     433079    913    351       0.995604          0.997199        0.451980         0.996400                     NaN                     NaN                   1.489759                   1.954212
-  SNP    ALL      3327496   3316374     11122      3820052      4177     497662   1686    344       0.996658          0.998743        0.130276         0.997699                2.102576                1.991054                   1.535137                   1.457635
-  SNP   PASS      3327496   3316374     11122      3820052      4177     497662   1686    344       0.996658          0.998743        0.130276         0.997699                2.102576                1.991054                   1.535137                   1.457635
+INDEL    ALL       504501    502210      2291       954974      1522     429900    956    362       0.995459          0.997101        0.450169         0.996279                     NaN                     NaN                   1.489759                   1.942299
+INDEL   PASS       504501    502210      2291       954974      1522     429900    956    362       0.995459          0.997101        0.450169         0.996279                     NaN                     NaN                   1.489759                   1.942299
+  SNP    ALL      3327496   3316336     11160      3823082      4229     500683   1696    356       0.996646          0.998727        0.130963         0.997686                2.102576                1.990152                   1.535137                   1.449299
+  SNP   PASS      3327496   3316336     11160      3823082      4229     500683   1696    356       0.996646          0.998727        0.130963         0.997686                2.102576                1.990152                   1.535137                   1.449299
 ```
 
-| Type  | TRUTH.TP | TRUTH.FN | QUERY.FP | METRIC.Recall | METRIC.Precision | METRIC.F1_Score |
-| ----- | -------- | -------- | -------- | ------------- | ---------------- | --------------- |
-| INDEL | 502283   | 2218     | 1471     | 0.995604      | 0.997199         | 0.9964          |
-| SNP   | 3316374  | 11122    | 4177     | 0.996658      | 0.998743         | 0.997699        |
-
 This can be compared with
-https://github.com/google/deepvariant/blob/r1.7/docs/metrics.md#accuracy.
+https://github.com/google/deepvariant/blob/r1.8/docs/metrics.md#accuracy.
 
 Which shows that `vg giraffe` improves F1:
 
-- Indel F1: 0.995998 --> 0.9964
-- SNP F1: 0.996237 --> 0.997699
+- Indel F1: 0.995945 --> 0.996279
+- SNP F1: 0.996213 --> 0.997686
diff --git a/docs/metrics.md b/docs/metrics.md
index ad45a5fb..fadb0c43 100644
--- a/docs/metrics.md
+++ b/docs/metrics.md
@@ -1,5 +1,17 @@
 # Runtime and accuracy metrics for all release models
 
+## Setup
+
+The runtime and accuracy reported in this page are generated using
+`n2-standard-96` GCP instances which has the following configuration:
+
+```bash
+GCP instance type: n2-standard-96
+CPUs: 96-core (vCPU)
+Memory: 384GiB
+GPUs: 0
+```
+
 ## WGS (Illumina)
 
 ### Runtime
@@ -9,11 +21,11 @@ Reported runtime is an average of 5 runs.
 
 Stage                            | Time (minutes)
 -------------------------------- | ------------------
-make_examples                    | 114m35.42s
-call_variants                    | 65m6.69s
-postprocess_variants (with gVCF) | 12m7.55s
-vcf_stats_report (optional)      | 9m9.16s
-total                            | 208m46.70s (3h28m46.70s)
+make_examples                    | 54m58.62s
+call_variants                    | 38m45.29s
+postprocess_variants (with gVCF) | 8m22.88s
+vcf_stats_report (optional)      | 5m37.52s (optional)
+total                            | 113m11.70s (1h53m11.70s)
 
 ### Accuracy
 
@@ -25,7 +37,7 @@ held out while training.
 | INDEL | 501653   | 2848     | 1289     | 0.994355      | 0.997541         | 0.995945        |
 | SNP   | 3306740  | 20756    | 4386     | 0.993762      | 0.998676         | 0.996213        |
 
-[See VCF stats report.](https://storage.googleapis.com/deepvariant/visual_reports/DeepVariant/1.7.0/WGS/deepvariant.output.visual_report.html)
+[See VCF stats report.](https://storage.googleapis.com/deepvariant/visual_reports/DeepVariant/1.8.0/WGS/deepvariant.output.visual_report.html)
 
 ## WES (Illumina)
 
@@ -36,11 +48,11 @@ Reported runtime is an average of 5 runs.
 
 Stage                            | Time (minutes)
 -------------------------------- | -----------------
-make_examples                    | 6m35.96s
-call_variants                    | 1m31.70s
-postprocess_variants (with gVCF) | 0m58.50s
-vcf_stats_report (optional)      | 0m7.67s
-total                            | 9m38.11s
+make_examples                    | 3m17.64s
+call_variants                    | 0m56.36s
+postprocess_variants (with gVCF) | 0m39.27s
+vcf_stats_report (optional)      | 0m4.93s (optional)
+total                            | 5m26.00s
 
 ### Accuracy
 
@@ -49,13 +61,26 @@ held out while training.
 
 | Type  | TRUTH.TP | TRUTH.FN | QUERY.FP | METRIC.Recall | METRIC.Precision | METRIC.F1_Score |
 | ----- | -------- | -------- | -------- | ------------- | ---------------- | --------------- |
-| INDEL | 1020     | 31       | 12       | 0.970504      | 0.988615         | 0.979476        |
-| SNP   | 24982    | 297      | 64       | 0.988251      | 0.997445         | 0.992827        |
+| INDEL | 1020     | 31       | 7        | 0.970504      | 0.993327         | 0.981783        |
+| SNP   | 24984    | 295      | 60       | 0.98833       | 0.997604         | 0.992946        |
 
-[See VCF stats report.](https://storage.googleapis.com/deepvariant/visual_reports/DeepVariant/1.7.0/WES/deepvariant.output.visual_report.html)
+[See VCF stats report.](https://storage.googleapis.com/deepvariant/visual_reports/DeepVariant/1.8.0/WES/deepvariant.output.visual_report.html)
 
 ## PacBio (HiFi)
 
+### Updated dataset in release 1.8.0
+
+In release 1.8.0, we have updated the PacBio test data from HG003 Sequel-II to
+latest Revio with SPRQ chemistry data to showcase performance on the updated
+platform and chemistry. The numbers reported here are generated using the bam
+that can be found in:
+
+```bash
+gs://deepvariant/pacbio-case-study-testdata/HG003.SPRQ.pacbio.GRCh38.nov2024.bam
+```
+
+Which is also available through [here](https://downloads.pacbcloud.com/public/revio/2024Q4/WGS/GIAB_trio/HG003/analysis/GRCh38.m84039_241002_000337_s3.hifi_reads.bc2020.bam).
+
 ### Runtime
 
 Runtime is on HG003 (all chromosomes).
@@ -63,11 +88,11 @@ Reported runtime is an average of 5 runs.
 
 Stage                            | Time (minutes)
 -------------------------------- | -------------------
-make_examples                    | 77m48.24s
-call_variants                    | 82m33.11s
-postprocess_variants (with gVCF) | 10m27.86s
-vcf_stats_report (optional)      | 10m1.56s
-total                            | 190m21.52s (3h10m21.52s)
+make_examples                    | 31m51.00s
+call_variants                    | 34m49.62s
+postprocess_variants (with gVCF) | 5m28.59s
+vcf_stats_report (optional)      | 5m36.49s (optional)
+total                            | 86m50.09s (1h26m50.09s)
 
 ### Accuracy
 
@@ -79,10 +104,11 @@ to run DeepVariant once.
 
 | Type  | TRUTH.TP | TRUTH.FN | QUERY.FP | METRIC.Recall | METRIC.Precision | METRIC.F1_Score |
 | ----- | -------- | -------- | -------- | ------------- | ---------------- | --------------- |
-| INDEL | 501727   | 2774     | 2643     | 0.994501      | 0.994968         | 0.994735        |
-| SNP   | 3324603  | 2892     | 2056     | 0.999131      | 0.999382         | 0.999257        |
+| INDEL | 500955   | 3546     | 3373     | 0.992971      | 0.993555         | 0.993263        |
+| SNP   | 3321825  | 5670     | 4263     | 0.998296      | 0.99872          | 0.998508        |
+
 
-[See VCF stats report.](https://storage.googleapis.com/deepvariant/visual_reports/DeepVariant/1.7.0/PACBIO/deepvariant.output.visual_report.html)
+[See VCF stats report.](https://storage.googleapis.com/deepvariant/visual_reports/DeepVariant/1.8.0/PACBIO/deepvariant.output.visual_report.html)
 
 ## ONT_R104
 
@@ -93,11 +119,11 @@ Reported runtime is an average of 5 runs.
 
 Stage                            | Time (minutes)
 -------------------------------- | --------------------
-make_examples                    | 125m50.85s
-call_variants                    | 107m45.55s
-postprocess_variants (with gVCF) | 11m30.19s
-vcf_stats_report (optional)      | 11m0.03s
-total                            | 258m4.89s (4h18m4.89s)
+make_examples                    | 53m25.60s
+call_variants                    | 55m24.86s
+postprocess_variants (with gVCF) | 7m17.83s
+vcf_stats_report (optional)      | 6m30.29s (optional)
+total                            | 127m56.44s (2h7m56.44s)
 
 ### Accuracy
 
@@ -106,10 +132,11 @@ truth), which was held out while training.
 
 | Type  | TRUTH.TP | TRUTH.FN | QUERY.FP | METRIC.Recall | METRIC.Precision | METRIC.F1_Score |
 | ----- | -------- | -------- | -------- | ------------- | ---------------- | --------------- |
-| INDEL | 443724   | 60777    | 42938    | 0.87953       | 0.914541         | 0.896694        |
-| SNP   | 3319440  | 8044     | 5005     | 0.997583      | 0.998495         | 0.998039        |
+| INDEL | 452010   | 52491    | 40289    | 0.895955      | 0.920501         | 0.908062        |
+| SNP   | 3321452  | 6032     | 3942     | 0.998187      | 0.998815         | 0.998501        |
 
-[See VCF stats report.](https://storage.googleapis.com/deepvariant/visual_reports/DeepVariant/1.7.0/ONT_R104/deepvariant.output.visual_report.html)
+
+[See VCF stats report.](https://storage.googleapis.com/deepvariant/visual_reports/DeepVariant/1.8.0/ONT_R104/deepvariant.output.visual_report.html)
 
 ## Hybrid (Illumina + PacBio HiFi)
 
@@ -119,12 +146,12 @@ Runtime is on HG003 (all chromosomes).
 Reported runtime is an average of 5 runs.
 
 Stage                            | Time (minutes)
--------------------------------- | -------------------
-make_examples                    | 160m29.82s
-call_variants                    | 90m45.03s
-postprocess_variants (with gVCF) | 6m43.66s
-vcf_stats_report (optional)      | 9m31.77s
-total                            | 278m59.35s (4h38m59.35s)
+-------------------------------- | ------------------
+make_examples                    | 71m52.43s
+call_variants                    | 51m42.37s
+postprocess_variants (with gVCF) | 4m6.13s
+vcf_stats_report (optional)      | 5m18.39s (optional)
+total                            | 151m34.49s (2h31m34.49s)
 
 ### Accuracy
 
@@ -136,7 +163,7 @@ out while training the hybrid model.
 | INDEL | 503109   | 1392     | 2636     | 0.997241      | 0.995022         | 0.99613         |
 | SNP   | 3324179  | 3316     | 2049     | 0.999003      | 0.999384         | 0.999194        |
 
-[See VCF stats report.](https://storage.googleapis.com/deepvariant/visual_reports/DeepVariant/1.7.0/HYBRID/deepvariant.output.visual_report.html)
+[See VCF stats report.](https://storage.googleapis.com/deepvariant/visual_reports/DeepVariant/1.8.0/HYBRID/deepvariant.output.visual_report.html)
 
 ## Inspect outputs that produced the metrics above
 
@@ -152,7 +179,7 @@ https://42basepairs.com/browse/gs/deepvariant/case-study-outputs
 ## How to reproduce the metrics on this page
 
 For simplicity and consistency, we report runtime with a
-[CPU instance with 64 CPUs](deepvariant-details.md#command-for-a-cpu-only-machine-on-google-cloud-platform)
+[CPU instance with 96 CPUs](deepvariant-details.md#command-for-a-cpu-only-machine-on-google-cloud-platform)
 This is NOT the fastest or cheapest configuration.
 
 Use `gcloud compute ssh` to log in to the newly created instance.
@@ -161,7 +188,7 @@ Download and run any of the following case study scripts:
 
 ```
 # Get the script.
-curl -O https://raw.githubusercontent.com/google/deepvariant/r1.7/scripts/inference_deepvariant.sh
+curl -O https://raw.githubusercontent.com/google/deepvariant/r1.8/scripts/inference_deepvariant.sh
 
 # WGS
 bash inference_deepvariant.sh --model_preset WGS
@@ -184,5 +211,5 @@ DeepVariant. The runtime numbers reported above are the average of 5 runs each.
 The accuracy metrics come from the hap.py summary.csv output file.
 The runs are deterministic so all 5 runs produced the same output.
 
-[CPU instance with 64 CPUs]: deepvariant-details.md#command-for-a-cpu-only-machine-on-google-cloud-platform
+[CPU instance with 96 CPUs]: deepvariant-details.md#command-for-a-cpu-only-machine-on-google-cloud-platform