From c2fddcc0136a06dbf0198616d430a6754653a4c8 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Fri, 18 Oct 2019 00:36:16 +0300 Subject: [PATCH 01/15] standard lazy types --- text/0000-standard-lazy-types.md | 416 +++++++++++++++++++++++++++++++ 1 file changed, 416 insertions(+) create mode 100644 text/0000-standard-lazy-types.md diff --git a/text/0000-standard-lazy-types.md b/text/0000-standard-lazy-types.md new file mode 100644 index 00000000000..e24201a7047 --- /dev/null +++ b/text/0000-standard-lazy-types.md @@ -0,0 +1,416 @@ +- Feature Name: `once_cell` +- Start Date: 2019-10-17 +- RFC PR: [rust-lang/rfcs#0000](https://github.com/rust-lang/rfcs/pull/0000) +- Rust Issue: [rust-lang/rust#0000](https://github.com/rust-lang/rust/issues/0000) + +# Summary +[summary]: #summary + +Add support for lazy initialized values to standard library, effectively superseding the popular [`lazy_static`] crate. + +```rust +use std::sync::Lazy; + +// `BACKTRACE` implements `Deref>` and is initialized +// on the first access +static BACKTRACE: Lazy> = Lazy::new(|| { + std::env::var("RUST_BACKTRACE").ok() +}); +``` + +# Motivation +[motivation]: #motivation + +Working with lazy initialized values is ubiquitous, [`lazy_static`] and [`lazycell`] crates have more than 20 million downloads combined. +Although some of the popularity of `lazy_static` can be attributed to current limitations of constant evaluation in Rust, there are many cases when even perfect const fn can't replace lazy values. + +At the same time, working with lazy values in Rust is not easy: + +* Implementing them requires moderately tricky unsafe code. Multiple soundness holes were found in the implementations from crates.io. +* C++ and Java provide language-level delayed initialization for static values, while Rust requires explicit code to handle runtime-initialization. +* Rust borrowing rules require a special pattern when implementing lazy fields. + +While `lazy_static` is implemented using macros, to work-around language limitations, today it is possible to implement similar functionality without resorting to macros, as a natural combination of two features: +* lazy values +* `static` keyword + +We can have a single canonical API for a commonly used tricky unsafe concept, so we probably should have it! + +# Guide-level explanation +[guide-level-explanation]: #guide-level-explanation + +Lazy values are a form of interior mutability. +The key observation is that restricting a cell to single assignment allows to safely return a shared reference to the contents of the cell. +Such cell is called `OnceCell`, by analogy with `std::sync::Once` type. The core API is as follows: + +```rust +pub struct OnceCell { ... } + +impl OnceCell { + /// Creates a new empty cell. + pub const fn new() -> OnceCell; + + /// Gets the reference to the underlying value. + /// + /// Returns `None` if the cell is empty. + pub fn get(&self) -> Option<&T>; + + /// Sets the contents of this cell to `value`. + /// + /// Returns `Ok(())` if the cell was empty and `Err(value)` if it was + /// full. + pub fn set(&self, value: T) -> Result<(), T>; + + /// Gets the contents of the cell, initializing it with `f` + /// if the cell was empty. + /// + /// # Panics + /// + /// If `f` panics, the panic is propagated to the caller, and the cell + /// remains uninitialized. + /// + /// It is an error to reentrantly initialize the cell from `f`. Doing + /// so results in a panic. + pub fn get_or_init(&self, f: F) -> &T + where + F: FnOnce() -> T, + ; + + /// Gets the contents of the cell, initializing it with `f` if + /// the cell was empty. If the cell was empty and `f` failed, an + /// error is returned. + /// + /// # Panics + /// + /// If `f` panics, the panic is propagated to the caller, and the cell + /// remains uninitialized. + /// + /// It is an error to reentrantly initialize the cell from `f`. Doing + /// so results in a panic. + pub fn get_or_try_init(&self, f: F) -> Result<&T, E> + where + F: FnOnce() -> Result, + ; +} +``` + +Notable features of the API: + +* `OnceCell` is created empty, by a const fn. +* Initialization succeeds at most once. +* `get_or_init` and `get_or_try_init` methods can be used to conveniently initialize a cell. +* `get_` family of methods return `&T`. + +Similarly to other interior mutability primitives, `OnceCell` comes in two flavors: + +* Non thread-safe `std::cell::OnceCell`. +* Thread-safe `std::sync::OnceCell`. + +Here's how `OnceCell` can be used to implement lazy-initialized global data: + +```rust +use std::{sync::{Mutex, OnceCell}, collections::HashMap}; + +fn global_data() -> &'static Mutex> { + static INSTANCE: OnceCell>> = OnceCell::new(); + INSTANCE.get_or_init(|| { + let mut m = HashMap::new(); + m.insert(13, "Spica".to_string()); + m.insert(74, "Hoyten".to_string()); + Mutex::new(m) + }) +} +``` + +Here's how `OnceCell` can be used to implement a lazy field: + +```rust +use std::{fs, io, path::PathBuf, cell::OnceCell}; + +struct Ctx { + config_path: PathBuf, + config: OnceCell, +} + +impl Ctx { + pub fn get_config(&self) -> Result<&str, io::Error> { + let cfg = self.config.get_or_try_init(|| { + fs::read_to_string(&self.config_path) + })?; + Ok(cfg.as_str()) + } +} +``` + +We also provide a more convenient but less powerful `Lazy` wrapper around `OnceCell`, which allows to specify the initializing closure at creation time: + +```rust +pub struct Lazy T> { ... } + +impl Lazy { + /// Creates a new lazy value with the given initializing function. + pub const fn new(init: F) -> Lazy; +} + +impl T> Lazy { + /// Forces the evaluation of this lazy value and returns a reference to + /// the result. + /// + /// This is equivalent to the `Deref` impl, but is explicit. + pub fn force(this: &Lazy) -> &T; +} + +impl T> Deref for Lazy { + type Target = T; + + fn deref(&self) -> &T; +} +``` + +`Lazy` directly replaces `lazy_static!`: + +```rust +use std::{sync::{Mutex, Lazy}, collections::HashMap}; + +static GLOBAL_DATA: Lazy>> = Lazy::new(|| { + let mut m = HashMap::new(); + m.insert(13, "Spica".to_string()); + m.insert(74, "Hoyten".to_string()); + Mutex::new(m) +}); +``` + +Moreover, once `#[thread_local]` attribute is stable, `Lazy` will supplant `std::thread_local!` as well: + +```rust +use std::cell::{RefCell, Lazy}; + +#[thread_local] +pub static FOO: Lazy> = Lazy::new(|| RefCell::new(1)); +``` + +Unlike `lazy_static!`, `Lazy` can be used used for locals: + +```rust +use std::cell::Lazy; + +fn main() { + let ctx = vec![1, 2, 3]; + let thunk = Lazy::new(|| { + ctx.iter().sum::() + }); + assert_eq!(*thunk, 6); +} +``` + +# Reference-level explanation +[reference-level-explanation]: #reference-level-explanation + +The proposed API is directly copied from [`once_cell`] crate. + +Altogether, this RFC proposes to add four types: + +* `std::cell::OnceCell`, `std::cell::Lazy` +* `std::sync::OnceCell`, `std::sync::Lazy` + +`OnceCell` is an important core primitive. +`Lazy` can be stabilized separately from `OnceCell`, or it can be omitted from the standard library altogether. +However, it provides significantly nicer ergonomics for the common use-case of static lazy values. + +Non thread-safe flavor is implemented by storing an `UnsafeCell>`: + +```rust +pub struct OnceCell { + // Invariant: written to at most once. + inner: UnsafeCell>, +} +``` + +The implementation is mostly straightforward. +The only tricky bit is that reentrant initialization should be explicitly forbidden. +That is, the following program panics: + +```rust +let x: OnceCell> = OnceCell::new(); +let dangling_ref: Cell> = Cell::new(None); +x.get_or_init(|| { + let r = x.get_or_init(|| Box::new(92)); + dangling_ref.set(Some(r)); + Box::new(62) +}); +println!("would be use after free: {:?}", dangling_ref.get().unwrap()); +``` + +Non thread-safe flavor can be added to `core` as well. + +The thread-safe variant is implemented similarly to `std::sync::Once`. +Crucially, it has support for blocking: if many threads call `get_or_init` concurrently, only one will be able to execute the closure, while all other threads will block. +For this reason, `std::sync::OnceCell` can not be provided in core. +Even the minimal `OnceCell::::set` API requires support for blocking, because one can't atomically set arbitrary `T`. + +# Drawbacks +[drawbacks]: #drawbacks + +* This is a moderately large addition to stdlib, there's a chance we do something wrong. + This can be mitigated by piece-wise stabilization (in particular, `Lazy` convenience types are optional) and the fact that API is battle-tested via `once_cell` crate. + +* The design of `Lazy` type uses default type-parameter as a work-around for the absence of type-inference of statics. + +* We use the same name for unsync and sync types, which might be confusing. + +# Rationale and alternatives +[rationale-and-alternatives]: #rationale-and-alternatives + +## Why not `Lazy` as a primitive? + +On the first look, it may seem like we don't need `OnceCell`, and should only provide `Lazy`. +The critical drawback of `Lazy` is that it's not always possible to provide the closure at creation time. + +This is important for lazy fields: + +```rust +struct Ctx { + config_path: PathBuf, + config: Lazy, +} + +impl Ctx { + pub fn new(config_path: PathBuf) -> Ctx { + Ctx { + config_path, + config: Lazy::new(|| { + // We would like to write something like + // `fs::read_to_string(&self.config_path)` + // here, but we can't have access to `self` + ??? + }) + } + } +} +``` + +Or for singletons, initialized with parameters: + +```rust +use std::{env, io, sync::OnceCell}; + +#[derive(Debug)] +pub struct Logger { ... } + +static INSTANCE: OnceCell = OnceCell::new(); +impl Logger { + pub fn global() -> &'static Logger { + INSTANCE.get().expect("logger is not initialized") + } + fn from_cli(args: env::Args) -> Result { ... } +} + +fn main() { + let logger = Logger::from_cli(env::args()).unwrap(); + + // Note how we use locally-created value for initialization. + INSTANCE.set(logger).unwrap(); + + // use `Logger::global()` from now on +} +``` + +## Why `OnceCell` as a primitive? + +It is possible to imagine a type, slightly more general than `OnceCell`: + +```rust +struct OnceFlipCell { ... } + +impl OnceFlipCell { + const fn new(initial_value: U) -> OnceFlipCell; + + fn get_or_init V>(&self, f: F) -> &V; +} + +type OnceCell = OnceFlipCell<(), T>; +``` + +That is, we can store some initial state in the cell and consume it during initialization. +In practice, such flexibility seems to be rarely required. +Even if we add a type, similar to `OnceFlipCell`, having a dedicated `OnceCell` (which *could* be implemented on top of `OnceFlipCell`) type simplifies common use-case. + +## Poisoning + +As a cell can be empty or fully initialized, the proposed API does not use poisoning. +If an initialization function panics, the cell remains uninitialized. +An alternative would be to add poisoning, which will make all subsequent `get` calls to panic. + +Similarly, because `OnceCell` provides strong exception safety guarantee, it implements `UnwindSafe`: + +```rust +impl UnwindSafe for OnceCell {} +impl RefUnwindSafe for OnceCell {} +``` + +## Default type parameter on `Lazy` + +`Lazy` is defined with default type parameter. + +```rust +pub struct Lazy T> { ... } +``` + +This is important to make using `Lazy` in static contexts convenient. +Without this default, the user would have to type `T` type twice: + +```rust +static GLOBAL_DATA: Lazy>, fn() -> Mutex> + = Lazy::new(|| ... ); +``` + +If we allow type inference in statics, this could be shortened to + +```rust +static GLOBAL_DATA: Lazy>, _> + = Lazy::new(|| ... ); +``` + +There are two drawbacks of using fn pointer type: + +* fn pointers are not ZSTs, so we waste one pointer per static lazy value. + Lazy locals will generally rely on type-inference and will use more specific closure type. +* Specifying type for local lazy value might be tricky: `let x: Lazy = Lazy::new(|| closed_over_var)` fails with type error, the correct syntax is `let x: Lazy = Lazy::new(|| closed_over_var)`. + +## Only thread-safe flavor + +It is possible to add only `sync` version of the types, as they are the most useful. +However, this will be against zero cost abstractions spirit. +Additionally, non thread-safe version is required to replace `thread_local!` macro without imposing synchronization. + +# Prior art +[prior-art]: #prior-art + +The primary bit of prior art here is the [`once_cell`] library, which itself draws on multiple sources: + +* [double-checked-cell](https://crates.io/crates/double-checked-cell) +* [lazy-init](https://crates.io/crates/lazy-init) +* [lazycell](https://crates.io/crates/lazycell) +* [mitochondria](https://crates.io/crates/mitochondria) +* [lazy_static](https://crates.io/crates/lazy_static) + +Many languages provide library-defined lazy values, for example [Kotlin](https://kotlinlang.org/api/latest/jvm/stdlib/kotlin/lazy.html#kotlin$lazy(kotlin.Function0((kotlin.lazy.T)))). +Typically, a lazy value is just a wrapper around closure. +This design doesn't always work in Rust, as closing over `self` runs afoul of the borrow checker, we need a more primitive `OnceCell` type. + +# Unresolved questions +[unresolved-questions]: #unresolved-questions + +- What is the best naming/place for these types? +- What is the best naming scheme for methods? Is it `get_or_try_init` or `try_inert_with`? +- Is the `F = fn() -> T` hack worth it? + +# Future possibilities +[future-possibilities]: #future-possibilities + +* Once `#[thread_local]` attribute is stable, `cell::Lazy` can serve as a replacement for `std::thread_local!` macro. +* Supporting type inference in constants might allow us to drop the default type parameter on `Lazy`. + +[`lazy_static`]: https://crates.io/crates/lazy_static +[`lazycell`]: https://crates.io/crates/lazycell +[`once_cell`]: https://crates.io/crates/once_cell From 0d2f664422db665d76c77900134e8d2079000b19 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Fri, 18 Oct 2019 18:42:35 +0300 Subject: [PATCH 02/15] Apply suggestions from code review Co-Authored-By: Mazdak Farrokhzad --- text/0000-standard-lazy-types.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/text/0000-standard-lazy-types.md b/text/0000-standard-lazy-types.md index e24201a7047..457c24686ba 100644 --- a/text/0000-standard-lazy-types.md +++ b/text/0000-standard-lazy-types.md @@ -22,7 +22,7 @@ static BACKTRACE: Lazy> = Lazy::new(|| { [motivation]: #motivation Working with lazy initialized values is ubiquitous, [`lazy_static`] and [`lazycell`] crates have more than 20 million downloads combined. -Although some of the popularity of `lazy_static` can be attributed to current limitations of constant evaluation in Rust, there are many cases when even perfect const fn can't replace lazy values. +Although some of the popularity of `lazy_static` can be attributed to current limitations of constant evaluation in Rust, there are many cases when even perfect `const fn` can't replace lazy values. At the same time, working with lazy values in Rust is not easy: @@ -333,7 +333,7 @@ type OnceCell = OnceFlipCell<(), T>; That is, we can store some initial state in the cell and consume it during initialization. In practice, such flexibility seems to be rarely required. -Even if we add a type, similar to `OnceFlipCell`, having a dedicated `OnceCell` (which *could* be implemented on top of `OnceFlipCell`) type simplifies common use-case. +Even if we add a type, similar to `OnceFlipCell`, having a dedicated `OnceCell` (which *could* be implemented on top of `OnceFlipCell`) type simplifies a common use-case. ## Poisoning @@ -380,7 +380,7 @@ There are two drawbacks of using fn pointer type: ## Only thread-safe flavor It is possible to add only `sync` version of the types, as they are the most useful. -However, this will be against zero cost abstractions spirit. +However, this would be against zero cost abstractions spirit. Additionally, non thread-safe version is required to replace `thread_local!` macro without imposing synchronization. # Prior art From 34c70a4d7956f7d42b51222f6b6c9d5dc4756828 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Mon, 21 Oct 2019 11:10:35 +0300 Subject: [PATCH 03/15] remove numbers --- text/0000-standard-lazy-types.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/text/0000-standard-lazy-types.md b/text/0000-standard-lazy-types.md index 457c24686ba..873def9ed49 100644 --- a/text/0000-standard-lazy-types.md +++ b/text/0000-standard-lazy-types.md @@ -21,7 +21,7 @@ static BACKTRACE: Lazy> = Lazy::new(|| { # Motivation [motivation]: #motivation -Working with lazy initialized values is ubiquitous, [`lazy_static`] and [`lazycell`] crates have more than 20 million downloads combined. +Working with lazy initialized values is ubiquitous, [`lazy_static`] and [`lazycell`] crates are used throughout the ecosystem. Although some of the popularity of `lazy_static` can be attributed to current limitations of constant evaluation in Rust, there are many cases when even perfect `const fn` can't replace lazy values. At the same time, working with lazy values in Rust is not easy: From fe46a5040dde6406766801787d3321c3b8a1309a Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Mon, 21 Oct 2019 11:20:55 +0300 Subject: [PATCH 04/15] note that thread_local is not comming soon --- text/0000-standard-lazy-types.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/text/0000-standard-lazy-types.md b/text/0000-standard-lazy-types.md index 873def9ed49..7b8bc2354cc 100644 --- a/text/0000-standard-lazy-types.md +++ b/text/0000-standard-lazy-types.md @@ -180,7 +180,7 @@ static GLOBAL_DATA: Lazy>> = Lazy::new(|| { }); ``` -Moreover, once `#[thread_local]` attribute is stable, `Lazy` will supplant `std::thread_local!` as well: +Moreover, once `#[thread_local]` attribute is stable (which is not on the road to stabilization yet), `Lazy` will supplant `std::thread_local!` as well: ```rust use std::cell::{RefCell, Lazy}; From 07c3c59a969d32b3b30075122887f3de8e29f00c Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Mon, 21 Oct 2019 11:47:15 +0300 Subject: [PATCH 05/15] specify how we can add non-blocking `set` to libcore --- text/0000-standard-lazy-types.md | 45 ++++++++++++++++++++++++++++++-- 1 file changed, 43 insertions(+), 2 deletions(-) diff --git a/text/0000-standard-lazy-types.md b/text/0000-standard-lazy-types.md index 7b8bc2354cc..35bb1bbda25 100644 --- a/text/0000-standard-lazy-types.md +++ b/text/0000-standard-lazy-types.md @@ -245,8 +245,7 @@ Non thread-safe flavor can be added to `core` as well. The thread-safe variant is implemented similarly to `std::sync::Once`. Crucially, it has support for blocking: if many threads call `get_or_init` concurrently, only one will be able to execute the closure, while all other threads will block. -For this reason, `std::sync::OnceCell` can not be provided in core. -Even the minimal `OnceCell::::set` API requires support for blocking, because one can't atomically set arbitrary `T`. +For this reason, most of `std::sync::OnceCell` API can not be provided in `core`. # Drawbacks [drawbacks]: #drawbacks @@ -335,6 +334,48 @@ That is, we can store some initial state in the cell and consume it during initi In practice, such flexibility seems to be rarely required. Even if we add a type, similar to `OnceFlipCell`, having a dedicated `OnceCell` (which *could* be implemented on top of `OnceFlipCell`) type simplifies a common use-case. +## Variations of `set` + +The RFC proposes "obvious" signature for the `set` method: + +```rust +fn set(&self, value: T) -> Result<(), T>; +``` + +Note, however, that `set` establishes an invariant that the cell is initialized, so a more precise signature would be + +```rust +fn set(&self, value: T) -> (&T, Option); +``` + +To be able to return a reference, `set` might need to block a thread. +For example, if two threads call `set` concurrently, one of them needs to block while the other moves the value into the cell. +It is possible to provide a non-blocking alternative to `set`: + +```rust +fn try_set(&self, value: T) -> Result<&T, (Option<&T>, T)> +``` + +That is, if value is set successfully, a reference is returned. +Otherwise, ther the cell is either fully initialized, and a reference is returned as well, or the cell is being initialized, and no valid reference exist yet. + +## Support for `no_std` + +The RFC proposes to add `cell::OnceCell` and `cell::Lazy` to `core`, while keeping `sync::OnceCell` and `sync::Lazy` `std`-only. +However, there's a subset of `sync::OnceCell` that can be provided in `core`: + +```rust +impl OnceCell { + const fn new() -> OnceCell; + fn get(&self) -> Option<&T>; + fn try_set(&self, value: T) -> Result<&T, (Option<&T>, T)> +} +``` + +It is possible because, while `OnceCell` needs block for full API, its internal state can be implemented as a single `AtomicUsize`, so the `core` part does not need to know about blocking. +It is unclear if this API would be significantly useful. +In particular, the guarantees of non-blocking `set` are pretty weak, and are not enough to implement the `Lazy` wrapper. + ## Poisoning As a cell can be empty or fully initialized, the proposed API does not use poisoning. From cc9aee8447d7c33a054c2b9a56f28711d9348438 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Mon, 21 Oct 2019 13:07:02 +0300 Subject: [PATCH 06/15] specify that sync::OnceCell deadlocks when initialized reentrantly --- text/0000-standard-lazy-types.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/text/0000-standard-lazy-types.md b/text/0000-standard-lazy-types.md index 35bb1bbda25..50481913a92 100644 --- a/text/0000-standard-lazy-types.md +++ b/text/0000-standard-lazy-types.md @@ -70,7 +70,7 @@ impl OnceCell { /// remains uninitialized. /// /// It is an error to reentrantly initialize the cell from `f`. Doing - /// so results in a panic. + /// so results in a panic or a deadlock. pub fn get_or_init(&self, f: F) -> &T where F: FnOnce() -> T, @@ -86,7 +86,7 @@ impl OnceCell { /// remains uninitialized. /// /// It is an error to reentrantly initialize the cell from `f`. Doing - /// so results in a panic. + /// so results in a panic or a deadlock. pub fn get_or_try_init(&self, f: F) -> Result<&T, E> where F: FnOnce() -> Result, @@ -246,6 +246,8 @@ Non thread-safe flavor can be added to `core` as well. The thread-safe variant is implemented similarly to `std::sync::Once`. Crucially, it has support for blocking: if many threads call `get_or_init` concurrently, only one will be able to execute the closure, while all other threads will block. For this reason, most of `std::sync::OnceCell` API can not be provided in `core`. +In the `sync` case, reliably panicking on re-entrant initialization is not trivial. +For this reason, the implementaion would simply deadlock, with a note that a deadlock might be elevated to panic in the future. # Drawbacks [drawbacks]: #drawbacks From 474a8bd0ee3766c3ec527099f0b15908377e3a7a Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Mon, 21 Oct 2019 16:11:08 +0300 Subject: [PATCH 07/15] clarify that thread_local might not work out --- text/0000-standard-lazy-types.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/text/0000-standard-lazy-types.md b/text/0000-standard-lazy-types.md index 50481913a92..3f3f155223d 100644 --- a/text/0000-standard-lazy-types.md +++ b/text/0000-standard-lazy-types.md @@ -180,7 +180,7 @@ static GLOBAL_DATA: Lazy>> = Lazy::new(|| { }); ``` -Moreover, once `#[thread_local]` attribute is stable (which is not on the road to stabilization yet), `Lazy` will supplant `std::thread_local!` as well: +Moreover, once `#[thread_local]` attribute is stable, `Lazy` might supplant `std::thread_local!` as well: ```rust use std::cell::{RefCell, Lazy}; @@ -203,6 +203,8 @@ fn main() { } ``` +However, `#[thread_local]` attribute is pretty far from stabilization at the moment, and due to the required special handling of destructors, it's unclear if just using `cell::Lazy` will work out. + # Reference-level explanation [reference-level-explanation]: #reference-level-explanation From d459f2c8da3811dcd12169d985d3fe3e60b00d9c Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Mon, 21 Oct 2019 16:11:26 +0300 Subject: [PATCH 08/15] slightly more precise wording --- text/0000-standard-lazy-types.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/text/0000-standard-lazy-types.md b/text/0000-standard-lazy-types.md index 3f3f155223d..20892be141e 100644 --- a/text/0000-standard-lazy-types.md +++ b/text/0000-standard-lazy-types.md @@ -255,7 +255,7 @@ For this reason, the implementaion would simply deadlock, with a note that a dea [drawbacks]: #drawbacks * This is a moderately large addition to stdlib, there's a chance we do something wrong. - This can be mitigated by piece-wise stabilization (in particular, `Lazy` convenience types are optional) and the fact that API is battle-tested via `once_cell` crate. + This can be mitigated by piece-wise stabilization (in particular, `Lazy` convenience types are optional) and the fact that API is tested in the crates.io ecosystem via `once_cell` crate. * The design of `Lazy` type uses default type-parameter as a work-around for the absence of type-inference of statics. From 77ff0800a1939eafcb7b592f68694c0e0f77ece9 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Sat, 26 Oct 2019 21:27:00 +0300 Subject: [PATCH 09/15] avoid tearing of thread_locals --- text/0000-standard-lazy-types.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/text/0000-standard-lazy-types.md b/text/0000-standard-lazy-types.md index 20892be141e..d1dc5dd95f1 100644 --- a/text/0000-standard-lazy-types.md +++ b/text/0000-standard-lazy-types.md @@ -189,6 +189,8 @@ use std::cell::{RefCell, Lazy}; pub static FOO: Lazy> = Lazy::new(|| RefCell::new(1)); ``` +However, `#[thread_local]` attribute is pretty far from stabilization at the moment, and due to the required special handling of destructors, it's unclear if just using `cell::Lazy` will work out. + Unlike `lazy_static!`, `Lazy` can be used used for locals: ```rust @@ -203,8 +205,6 @@ fn main() { } ``` -However, `#[thread_local]` attribute is pretty far from stabilization at the moment, and due to the required special handling of destructors, it's unclear if just using `cell::Lazy` will work out. - # Reference-level explanation [reference-level-explanation]: #reference-level-explanation From 19f6961c2db85e486ee870e5ed86cd155c0683a8 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Sat, 9 Nov 2019 15:00:14 +0300 Subject: [PATCH 10/15] Fix typo --- text/0000-standard-lazy-types.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/text/0000-standard-lazy-types.md b/text/0000-standard-lazy-types.md index d1dc5dd95f1..9522afd2678 100644 --- a/text/0000-standard-lazy-types.md +++ b/text/0000-standard-lazy-types.md @@ -191,7 +191,7 @@ pub static FOO: Lazy> = Lazy::new(|| RefCell::new(1)); However, `#[thread_local]` attribute is pretty far from stabilization at the moment, and due to the required special handling of destructors, it's unclear if just using `cell::Lazy` will work out. -Unlike `lazy_static!`, `Lazy` can be used used for locals: +Unlike `lazy_static!`, `Lazy` can be used for locals: ```rust use std::cell::Lazy; From b78c4165215cc0e854362df872193a66ddb8b41e Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Sun, 12 Jan 2020 16:01:33 +0100 Subject: [PATCH 11/15] Mention that spinlocks are deliberately omitted --- text/0000-standard-lazy-types.md | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/text/0000-standard-lazy-types.md b/text/0000-standard-lazy-types.md index 9522afd2678..6686d2dcb62 100644 --- a/text/0000-standard-lazy-types.md +++ b/text/0000-standard-lazy-types.md @@ -249,7 +249,7 @@ The thread-safe variant is implemented similarly to `std::sync::Once`. Crucially, it has support for blocking: if many threads call `get_or_init` concurrently, only one will be able to execute the closure, while all other threads will block. For this reason, most of `std::sync::OnceCell` API can not be provided in `core`. In the `sync` case, reliably panicking on re-entrant initialization is not trivial. -For this reason, the implementaion would simply deadlock, with a note that a deadlock might be elevated to panic in the future. +For this reason, the implementation would simply deadlock, with a note that a deadlock might be elevated to a panic in the future. # Drawbacks [drawbacks]: #drawbacks @@ -376,10 +376,14 @@ impl OnceCell { } ``` -It is possible because, while `OnceCell` needs block for full API, its internal state can be implemented as a single `AtomicUsize`, so the `core` part does not need to know about blocking. +It is possible because, while `OnceCell` needs blocking for full API, its internal state can be implemented as a single `AtomicUsize`, so the `core` part does not need to know about blocking. It is unclear if this API would be significantly useful. In particular, the guarantees of non-blocking `set` are pretty weak, and are not enough to implement the `Lazy` wrapper. +While it is possible to implement blocking in `#[no_std]` via a spin lock, we explicitly choose not to do so. +Spin locks are a sharp tool, which should only be used in specific circumstances (namely, when you have full control over thread scheduling). +`#[no_std]` code might end up in user space applications with preemptive scheduling, where unbounded spin locks are inappropriate. + ## Poisoning As a cell can be empty or fully initialized, the proposed API does not use poisoning. From 4dc732ec55ac9e2a19cd41baf9b8d7e8251a99ea Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Sun, 12 Jan 2020 16:41:56 +0100 Subject: [PATCH 12/15] Note Consume as an alternative to acquire --- text/0000-standard-lazy-types.md | 43 ++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/text/0000-standard-lazy-types.md b/text/0000-standard-lazy-types.md index 6686d2dcb62..76136bbadfd 100644 --- a/text/0000-standard-lazy-types.md +++ b/text/0000-standard-lazy-types.md @@ -432,6 +432,48 @@ It is possible to add only `sync` version of the types, as they are the most use However, this would be against zero cost abstractions spirit. Additionally, non thread-safe version is required to replace `thread_local!` macro without imposing synchronization. +## Synchronization Guarantees + +In theory, it is possible to specify two different synchronization guarantees for `get` operation, release/acquire or release/consume. +They differ in how they treat side effects. +If thread **A** executes `get_or_init(f)`, and thread **B** executes `get` and observes the value, release/acquire guarantees that **B** also observes side-effects of `f`. + +Here's a program which allows to observe the difference: + +```rust +static FLAG: AtomicBool = AtomicBool::new(false); +static CELL: OnceCell<()> = OnceCell::new(); + +// thread1 +CELL.get_or_init(|| FLAG.store(true, Relaxed)); + +// thread2 +if CELL.get().is_some() { + assert!(FLAG.load(Relaxed)) +} +``` + +Under release/acquire, the assert never fires. +Under release/consume, it might fire. + +Release/consume can potentially be implemented more efficiently on weak memory model architectures. +However, the situation with `consume` ordering is cloudy right now: + +* [nobody knows what it actually means](http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2016/p0371r0.html), +* [but people rely on it in practice for performance](https://docs.rs/crossbeam-utils/0.7.0/crossbeam_utils/atomic/trait.AtomicConsume.html#tymethod.load_consume). + +We can do one of the following: + +1. Specify and implement `acquire` ordering, +2. Specify `consume` but implement `acquire` (or hack `consume` in an implementation-defined manner) with the hope to make implementation more efficient later. +3. Specify and implement `acquire`, but provide additional API which can take `Ordering` as an argument. + +Option two seems the most promising: + +* it is forward compatible with specifying `acquire` later, +* for typical `OnceCell` use-cases, `consume` should be enough. + For guaranteeing side effects, `std::sync::Once` may be used instead. + # Prior art [prior-art]: #prior-art @@ -453,6 +495,7 @@ This design doesn't always work in Rust, as closing over `self` runs afoul of th - What is the best naming/place for these types? - What is the best naming scheme for methods? Is it `get_or_try_init` or `try_inert_with`? - Is the `F = fn() -> T` hack worth it? +- Which synchronization guarantee should we pick? # Future possibilities [future-possibilities]: #future-possibilities From 2b31697148ef44a1db6c1a116db6537fe6169a08 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Sun, 12 Jan 2020 16:45:30 +0100 Subject: [PATCH 13/15] Mention conquer-once --- text/0000-standard-lazy-types.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/text/0000-standard-lazy-types.md b/text/0000-standard-lazy-types.md index 76136bbadfd..406ccf2812d 100644 --- a/text/0000-standard-lazy-types.md +++ b/text/0000-standard-lazy-types.md @@ -384,6 +384,8 @@ While it is possible to implement blocking in `#[no_std]` via a spin lock, we ex Spin locks are a sharp tool, which should only be used in specific circumstances (namely, when you have full control over thread scheduling). `#[no_std]` code might end up in user space applications with preemptive scheduling, where unbounded spin locks are inappropriate. +A spin-lock based implementation of `OnceCell` is provided on crates.io in [`conquer-once`] crate. + ## Poisoning As a cell can be empty or fully initialized, the proposed API does not use poisoning. @@ -506,3 +508,4 @@ This design doesn't always work in Rust, as closing over `self` runs afoul of th [`lazy_static`]: https://crates.io/crates/lazy_static [`lazycell`]: https://crates.io/crates/lazycell [`once_cell`]: https://crates.io/crates/once_cell +[`conquer-once`]: https://github.com/oliver-giersch/conquer-once From 93231ebb1495f38b36e9d00ac25d8e7ef51ff5ec Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sun, 11 Dec 2022 22:19:18 -0500 Subject: [PATCH 14/15] Apply suggestions from tgross35's code review - Update LazyCell/LazyLock/OnceCell/OnceLock usage - Clarify stance on atomics - Some minor flow updates Co-authored-by: bl-ue <54780737+bl-ue@users.noreply.github.com> --- text/0000-standard-lazy-types.md | 75 +++++++++++++------------------- 1 file changed, 30 insertions(+), 45 deletions(-) diff --git a/text/0000-standard-lazy-types.md b/text/0000-standard-lazy-types.md index 406ccf2812d..5025b0469fc 100644 --- a/text/0000-standard-lazy-types.md +++ b/text/0000-standard-lazy-types.md @@ -30,9 +30,7 @@ At the same time, working with lazy values in Rust is not easy: * C++ and Java provide language-level delayed initialization for static values, while Rust requires explicit code to handle runtime-initialization. * Rust borrowing rules require a special pattern when implementing lazy fields. -While `lazy_static` is implemented using macros, to work-around language limitations, today it is possible to implement similar functionality without resorting to macros, as a natural combination of two features: -* lazy values -* `static` keyword +`lazy_static` is implemented using macros, to work-around former language limitations. Since then, various language improvements have made it possible to create runtime initialized (lazy) objects in a `static` scope, accomplishing the same goals without macros. We can have a single canonical API for a commonly used tricky unsafe concept, so we probably should have it! @@ -104,7 +102,7 @@ Notable features of the API: Similarly to other interior mutability primitives, `OnceCell` comes in two flavors: * Non thread-safe `std::cell::OnceCell`. -* Thread-safe `std::sync::OnceCell`. +* Thread-safe `std::sync::OnceLock`. Here's how `OnceCell` can be used to implement lazy-initialized global data: @@ -142,37 +140,35 @@ impl Ctx { } ``` -We also provide a more convenient but less powerful `Lazy` wrapper around `OnceCell`, which allows to specify the initializing closure at creation time: +We also provide the more convenient but less powerful `Lazy` and `LazyLock` wrappers around `OnceCell` and `OnceLock`, which allows specifying the initializing closure at creation time: ```rust -pub struct Lazy T> { ... } +pub struct LazyCell T> { ... } -impl Lazy { +impl T> LazyCell { /// Creates a new lazy value with the given initializing function. - pub const fn new(init: F) -> Lazy; -} - -impl T> Lazy { + pub const fn new(init: F) -> LazyCell; + /// Forces the evaluation of this lazy value and returns a reference to /// the result. /// /// This is equivalent to the `Deref` impl, but is explicit. - pub fn force(this: &Lazy) -> &T; + pub fn force(this: &LazyCell) -> &T; } -impl T> Deref for Lazy { +impl T> Deref for LazyCell { type Target = T; fn deref(&self) -> &T; } ``` -`Lazy` directly replaces `lazy_static!`: +`LazyLock` directly replaces `lazy_static!`: ```rust -use std::{sync::{Mutex, Lazy}, collections::HashMap}; +use std::{sync::{Mutex, LazyLock}, collections::HashMap}; -static GLOBAL_DATA: Lazy>> = Lazy::new(|| { +static GLOBAL_DATA: LazyLock>> = LazyLock::new(|| { let mut m = HashMap::new(); m.insert(13, "Spica".to_string()); m.insert(74, "Hoyten".to_string()); @@ -189,16 +185,15 @@ use std::cell::{RefCell, Lazy}; pub static FOO: Lazy> = Lazy::new(|| RefCell::new(1)); ``` -However, `#[thread_local]` attribute is pretty far from stabilization at the moment, and due to the required special handling of destructors, it's unclear if just using `cell::Lazy` will work out. Unlike `lazy_static!`, `Lazy` can be used for locals: ```rust -use std::cell::Lazy; +use std::cell::LazyCell; fn main() { let ctx = vec![1, 2, 3]; - let thunk = Lazy::new(|| { + let thunk = LazyCell::new(|| { ctx.iter().sum::() }); assert_eq!(*thunk, 6); @@ -212,12 +207,12 @@ The proposed API is directly copied from [`once_cell`] crate. Altogether, this RFC proposes to add four types: -* `std::cell::OnceCell`, `std::cell::Lazy` -* `std::sync::OnceCell`, `std::sync::Lazy` +* `std::cell::OnceCell`, `std::cell::LazyCell` +* `std::sync::OnceLock`, `std::sync::LazyLock` -`OnceCell` is an important core primitive. -`Lazy` can be stabilized separately from `OnceCell`, or it can be omitted from the standard library altogether. -However, it provides significantly nicer ergonomics for the common use-case of static lazy values. +`OnceCell` and `OnceLock` are important primitives. +`LazyCell ` and `LazyLock` can be stabilized separately from `OnceCell`, or optionally omitted from the standard library altogether. +However, as they provide significantly nicer ergonomics for the common use case of static lazy values, it is worth developing in tandem. Non thread-safe flavor is implemented by storing an `UnsafeCell>`: @@ -247,7 +242,7 @@ Non thread-safe flavor can be added to `core` as well. The thread-safe variant is implemented similarly to `std::sync::Once`. Crucially, it has support for blocking: if many threads call `get_or_init` concurrently, only one will be able to execute the closure, while all other threads will block. -For this reason, most of `std::sync::OnceCell` API can not be provided in `core`. +For this reason, most of `std::sync::OnceLock` API can not be provided in `core`. In the `sync` case, reliably panicking on re-entrant initialization is not trivial. For this reason, the implementation would simply deadlock, with a note that a deadlock might be elevated to a panic in the future. @@ -255,19 +250,19 @@ For this reason, the implementation would simply deadlock, with a note that a de [drawbacks]: #drawbacks * This is a moderately large addition to stdlib, there's a chance we do something wrong. - This can be mitigated by piece-wise stabilization (in particular, `Lazy` convenience types are optional) and the fact that API is tested in the crates.io ecosystem via `once_cell` crate. + This can be mitigated by piece-wise stabilization (in particular, `LazyCell` convenience types are optional) and the fact that API is tested in the crates.io ecosystem via `once_cell` crate. -* The design of `Lazy` type uses default type-parameter as a work-around for the absence of type-inference of statics. +* The design of `LazyCell` type uses default type-parameter as a workaround for the absence of type inference of statics. * We use the same name for unsync and sync types, which might be confusing. # Rationale and alternatives [rationale-and-alternatives]: #rationale-and-alternatives -## Why not `Lazy` as a primitive? +## Why not `LazyCell` as a primitive? -On the first look, it may seem like we don't need `OnceCell`, and should only provide `Lazy`. -The critical drawback of `Lazy` is that it's not always possible to provide the closure at creation time. +On the first look, it may seem like we don't need `OnceCell`, and should only provide `LazyCell`. +The critical drawback of `LazyCell` is that it's not always possible to provide the closure at creation time. This is important for lazy fields: @@ -361,12 +356,12 @@ fn try_set(&self, value: T) -> Result<&T, (Option<&T>, T)> ``` That is, if value is set successfully, a reference is returned. -Otherwise, ther the cell is either fully initialized, and a reference is returned as well, or the cell is being initialized, and no valid reference exist yet. +Otherwise, the cell is either fully initialized, and a reference is returned as well, or the cell is being initialized, and no valid reference exist yet. ## Support for `no_std` -The RFC proposes to add `cell::OnceCell` and `cell::Lazy` to `core`, while keeping `sync::OnceCell` and `sync::Lazy` `std`-only. -However, there's a subset of `sync::OnceCell` that can be provided in `core`: +The RFC proposes to add `cell::OnceCell` and `cell::LazyCell` to `core`, while keeping `sync::OnceLock` and `sync::LazyLock` `std`-only. +However, there's a subset of `OnceLock` that can be provided in `core`: ```rust impl OnceCell { @@ -464,17 +459,7 @@ However, the situation with `consume` ordering is cloudy right now: * [nobody knows what it actually means](http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2016/p0371r0.html), * [but people rely on it in practice for performance](https://docs.rs/crossbeam-utils/0.7.0/crossbeam_utils/atomic/trait.AtomicConsume.html#tymethod.load_consume). -We can do one of the following: - -1. Specify and implement `acquire` ordering, -2. Specify `consume` but implement `acquire` (or hack `consume` in an implementation-defined manner) with the hope to make implementation more efficient later. -3. Specify and implement `acquire`, but provide additional API which can take `Ordering` as an argument. - -Option two seems the most promising: - -* it is forward compatible with specifying `acquire` later, -* for typical `OnceCell` use-cases, `consume` should be enough. - For guaranteeing side effects, `std::sync::Once` may be used instead. +Given the cost of `consume` ordering for minimal benefit, this crate proposes to specify and implement `acquire/release` ordering. If at some point Rust adds a `consume/release` option to `std::sync::atomic::Ordering`, the option of adding API methods that accept an `Ordering` can be considered. # Prior art [prior-art]: #prior-art @@ -495,7 +480,7 @@ This design doesn't always work in Rust, as closing over `self` runs afoul of th [unresolved-questions]: #unresolved-questions - What is the best naming/place for these types? -- What is the best naming scheme for methods? Is it `get_or_try_init` or `try_inert_with`? +- What is the best naming scheme for methods? Is it `get_or_try_init` or `try_insert_with`? - Is the `F = fn() -> T` hack worth it? - Which synchronization guarantee should we pick? From 80a09d3b325b4bfb496f32c34a817d6f01bf5067 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Thu, 30 Mar 2023 16:19:31 +0100 Subject: [PATCH 15/15] once_cell: adjust links to tracking issue --- ...0-standard-lazy-types.md => 2788-standard-lazy-types.md} | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) rename text/{0000-standard-lazy-types.md => 2788-standard-lazy-types.md} (99%) diff --git a/text/0000-standard-lazy-types.md b/text/2788-standard-lazy-types.md similarity index 99% rename from text/0000-standard-lazy-types.md rename to text/2788-standard-lazy-types.md index 5025b0469fc..9385578040e 100644 --- a/text/0000-standard-lazy-types.md +++ b/text/2788-standard-lazy-types.md @@ -1,7 +1,7 @@ - Feature Name: `once_cell` - Start Date: 2019-10-17 -- RFC PR: [rust-lang/rfcs#0000](https://github.com/rust-lang/rfcs/pull/0000) -- Rust Issue: [rust-lang/rust#0000](https://github.com/rust-lang/rust/issues/0000) +- RFC PR: [rust-lang/rfcs#2788](https://github.com/rust-lang/rfcs/pull/2788) +- Rust Issue: [rust-lang/rust#74465](https://github.com/rust-lang/rust/issues/74465) # Summary [summary]: #summary @@ -148,7 +148,7 @@ pub struct LazyCell T> { ... } impl T> LazyCell { /// Creates a new lazy value with the given initializing function. pub const fn new(init: F) -> LazyCell; - + /// Forces the evaluation of this lazy value and returns a reference to /// the result. ///