From 8e93912e7f64a3b63ec3326f4fc429492732ad94 Mon Sep 17 00:00:00 2001 From: Joe Richey Date: Thu, 9 Jan 2025 20:20:19 -0800 Subject: [PATCH] Add benchmarks for u32/u64 functions. This allows us to see the effect of any specicalized implementations for `getrandom::u32` or `getrandom::u64`. As expected, on Linux (which just uses the default implementation in `utils.rs`) there is no change: ``` test bench_u32 ... bench: 196.50 ns/iter (+/- 4.85) = 20 MB/s test bench_u32_via_fill ... bench: 198.25 ns/iter (+/- 1.78) = 20 MB/s test bench_u64 ... bench: 196.95 ns/iter (+/- 2.99) = 40 MB/s test bench_u64_via_fill ... bench: 197.62 ns/iter (+/- 2.24) = 40 MB/s ``` but when using the `rdrand` backend (which is specialized), there is a mesurable difference. ``` test bench_u32 ... bench: 16.84 ns/iter (+/- 0.09) = 250 MB/s test bench_u32_via_fill ... bench: 18.40 ns/iter (+/- 0.28) = 222 MB/s test bench_u64 ... bench: 16.62 ns/iter (+/- 0.06) = 500 MB/s test bench_u64_via_fill ... bench: 17.70 ns/iter (+/- 0.08) = 470 MB/s ``` Signed-off-by: Joe Richey --- benches/buffer.rs | 52 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 51 insertions(+), 1 deletion(-) diff --git a/benches/buffer.rs b/benches/buffer.rs index 2899a3f7..0063a453 100644 --- a/benches/buffer.rs +++ b/benches/buffer.rs @@ -1,7 +1,10 @@ #![feature(test, maybe_uninit_uninit_array_transpose)] extern crate test; -use std::mem::MaybeUninit; +use std::{ + mem::{size_of, MaybeUninit}, + slice, +}; // Call getrandom on a zero-initialized stack buffer #[inline(always)] @@ -19,6 +22,53 @@ fn bench_fill_uninit() { test::black_box(buf); } +#[bench] +pub fn bench_u32(b: &mut test::Bencher) { + #[inline(never)] + fn inner() -> u32 { + getrandom::u32().unwrap() + } + b.bytes = 4; + b.iter(inner); +} +#[bench] +pub fn bench_u32_via_fill(b: &mut test::Bencher) { + #[inline(never)] + fn inner() -> u32 { + let mut res = MaybeUninit::::uninit(); + let dst: &mut [MaybeUninit] = + unsafe { slice::from_raw_parts_mut(res.as_mut_ptr().cast(), size_of::()) }; + getrandom::fill_uninit(dst).unwrap(); + unsafe { res.assume_init() } + } + b.bytes = 4; + b.iter(inner); +} + +#[bench] +pub fn bench_u64(b: &mut test::Bencher) { + #[inline(never)] + fn inner() -> u64 { + getrandom::u64().unwrap() + } + b.bytes = 8; + b.iter(inner); +} + +#[bench] +pub fn bench_u64_via_fill(b: &mut test::Bencher) { + #[inline(never)] + fn inner() -> u64 { + let mut res = MaybeUninit::::uninit(); + let dst: &mut [MaybeUninit] = + unsafe { slice::from_raw_parts_mut(res.as_mut_ptr().cast(), size_of::()) }; + getrandom::fill_uninit(dst).unwrap(); + unsafe { res.assume_init() } + } + b.bytes = 8; + b.iter(inner); +} + // We benchmark using #[inline(never)] "inner" functions for two reasons: // - Avoiding inlining reduces a source of variance when running benchmarks. // - It is _much_ easier to get the assembly or IR for the inner loop.