From b0171fa4728307f0949e9d8f4df2840133086d7f Mon Sep 17 00:00:00 2001 From: Levi Morrison Date: Thu, 19 Dec 2024 14:43:31 -0700 Subject: [PATCH] feat: add datadog-thin-str crate It doesn't yet update profiling to use it. Splitting into multiple PRs for it to be easier to review. --- thin-str/Cargo.toml | 24 +++ thin-str/LICENSE | 202 +++++++++++++++++++++ thin-str/src/lib.rs | 145 +++++++++++++++ thin-str/src/thin_str.rs | 159 +++++++++++++++++ thin-str/src/thin_string.rs | 347 ++++++++++++++++++++++++++++++++++++ 5 files changed, 877 insertions(+) create mode 100644 thin-str/Cargo.toml create mode 100644 thin-str/LICENSE create mode 100644 thin-str/src/lib.rs create mode 100644 thin-str/src/thin_str.rs create mode 100644 thin-str/src/thin_string.rs diff --git a/thin-str/Cargo.toml b/thin-str/Cargo.toml new file mode 100644 index 0000000000..c0b57f151b --- /dev/null +++ b/thin-str/Cargo.toml @@ -0,0 +1,24 @@ +# Copyright 2024-Present Datadog, Inc. https://www.datadoghq.com/ +# SPDX-License-Identifier: Apache-2.0 + +[package] +name = "datadog-thin-str" +version = "1.0.0" +edition = "2021" +license = "Apache-2.0" +rust-version = "1.71" + +[lib] +path = "src/lib.rs" +bench = false + +[features] +default = [] +std = [] + +[dependencies] +allocator-api2 = "0.2" +tagged-pointer = { version = "0.2", default-features = false } + +[dev-dependencies] +naughty-strings = "0.2" diff --git a/thin-str/LICENSE b/thin-str/LICENSE new file mode 100644 index 0000000000..d645695673 --- /dev/null +++ b/thin-str/LICENSE @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/thin-str/src/lib.rs b/thin-str/src/lib.rs new file mode 100644 index 0000000000..f3c0c1481f --- /dev/null +++ b/thin-str/src/lib.rs @@ -0,0 +1,145 @@ +// Copyright 2024-Present Datadog, Inc. https://www.datadoghq.com/ +// SPDX-License-Identifier: Apache-2.0 + +#![no_std] + +mod thin_str; +mod thin_string; + +use core::ops::Deref; +use core::{mem, ptr}; + +pub use crate::thin_str::*; +pub use crate::thin_string::*; + +/// [Storage] is the data layout that [ThinStr] and [ThinString] point at. +/// The data is immutable after construction to avoid accidentally creating +/// mutable references. +#[repr(C)] +pub struct Storage { + /// The header stores the number of bytes used in the string. + header: ThinHeader, + /// The bytes of the strings are stored here. Immutable after creation. + data: str, +} + +/// Represents a [Storage] with a known-at-compile-time len for the str. +#[repr(C)] +#[derive(Clone, Copy)] +pub struct ConstStorage { + /// The header stores the number of bytes used in the string. + header: ThinHeader, + /// The bytes of the string are stored here, it must be a valid str. + /// Immutable after creation. + data: [u8; N], +} + +/// The alignment is so that tagged pointers can use the least significant bit +/// for storing other things if they wish. However, it's intentionally minimal +/// so that strings can be packed tightly into arenas. Wasting a single +/// byte when there's string with an odd len is fine--C strings waste a byte +/// on the null terminator all the time. +#[repr(C, align(2))] +#[derive(Clone, Copy)] +pub struct ThinHeader { + /// The number of valid bytes which follow this header. + /// Create with [usize::to_ne_bytes] and restore it with + /// [usize::from_ne_bytes]. + size: [u8; mem::size_of::()], +} + +/// # Safety +/// The strings are immutable and can therefore be sent between threads. +unsafe impl Send for ThinHeader {} + +/// # Safety +/// The strings are immutable and can therefore be shared amongst threads. +unsafe impl Sync for ThinHeader {} + +/// ConstStorage representing the empty string "". +pub static EMPTY: ConstStorage<0> = ConstStorage::from_str(""); + +impl Storage { + /// Create a Storage reference from a non-null pointer. + /// # Safety + /// - The pointer to the ThinHeader needs to actually be a thin pointer to + /// a valid Storage object, which includes that it is properly aligned + /// and initialized. + /// - No mutable references should exist to the storage. + /// - The lifetime needs to ensure that the storage lives at least this + /// long. + const unsafe fn from_header<'a>(header_ptr: ptr::NonNull) -> &'a Storage { + let obj = header_ptr.cast::().as_ptr(); + let len = { + // SAFETY: based on this function's own safety requirements, this + // should already be 1) valid for reads, 2) properly aligned, and + // 3) properly initialized. + let header = unsafe { header_ptr.as_ptr().read() }; + usize::from_ne_bytes(header.size) + }; + + // Weird trick to get a fat pointer. The metadata from the slice will + // get used to create the metadata in the fat Storage pointer. This + // weird cast is documented: + // https://github.com/rust-lang/reference/blob/d6d24b9b548f62a50461bac85ce278d80437ab05/src/expressions/operator-expr.md?plain=1#L555 + let fat = ptr::slice_from_raw_parts(obj, len) as *const Storage; + + // SAFETY: based on this function's own safety requirements, this is + // safe to turn into a reference. + unsafe { &*fat } + } +} + +impl<'a> From<&'a Storage> for &'a str { + fn from(storage: &'a Storage) -> &'a str { + &storage.data + } +} + +impl Deref for Storage { + type Target = str; + + fn deref(&self) -> &Self::Target { + &self.data + } +} + +impl ConstStorage { + /// Create a [ConstStorage] from the provided string. + /// # Panic + /// Panics if the length of the string does not match the provided N. + pub const fn from_str(str: &str) -> ConstStorage { + if str.len() != N { + panic!("string length mismatch"); + } + let size = N.to_ne_bytes(); + let header = ThinHeader { size }; + // SAFETY: the bytes of the str are initialized, the length of the + // string is guarded to match the length of the array, the str is + // valid for reads, and strings are always properly aligned since + // their alignment is 1. + let data = unsafe { ptr::read(str.as_ptr().cast::<[u8; N]>()) }; + ConstStorage:: { header, data } + } + + /// Get a [Storage] reference to the same data held by this [ConstStorage]. + /// This method exists because From/Into are not const. + pub const fn as_storage(&self) -> &Storage { + let header_ptr = { + let ptr = self as *const Self as *const ThinHeader; + // SAFETY: Storage is immutable, and self means the data is alive + // and not null. + unsafe { ptr::NonNull::new_unchecked(ptr.cast_mut()) } + }; + // SAFETY: the layouts of Storage and ConstStorage are compatible, + // ConstStorage only holds valid strs, and the lifetime is valid. + unsafe { Storage::from_header(header_ptr) } + } + + /// Get a `&str` to the data held by this [ConstStorage]. + /// This method exists because From/Into are not const. + pub const fn as_ref(&self) -> &str { + // SAFETY: ConstStorage always hold valid strings. + unsafe { core::str::from_utf8_unchecked(&self.data) } + } +} diff --git a/thin-str/src/thin_str.rs b/thin-str/src/thin_str.rs new file mode 100644 index 0000000000..6eacca7b4f --- /dev/null +++ b/thin-str/src/thin_str.rs @@ -0,0 +1,159 @@ +// Copyright 2024-Present Datadog, Inc. https://www.datadoghq.com/ +// SPDX-License-Identifier: Apache-2.0 + +use super::{ConstStorage, Storage, ThinHeader, EMPTY}; +use core::ops::Deref; +use core::{hash, marker, ptr}; + +/// A borrowed string which uses a "thin" pointer, so it uses fewer bytes than +/// a &str does. This requires the size to be stored elsewhere, so there are +/// trade-offs. +#[derive(Clone, Copy, Debug)] +pub struct ThinStr<'a> { + /// Points to [Storage], except that we don't want a fat pointer (fat + /// because of the DST), so we store a pointer to the header instead. + ptr: ptr::NonNull, + + /// Since [ThinStr] acts like a reference type but doesn't hold one, we + /// tell the compiler this info with a marker. This doesn't add any size + /// to the struct. + _marker: marker::PhantomData<&'a [u8]>, +} + +impl ThinStr<'_> { + /// Creates a [ThinStr] that represents the empty string "". + pub fn new() -> Self { + Self::from_const_storage(&EMPTY) + } + + /// Creates a [ThinStr] from the provided pointer, but it must be a + /// pointer to a valid [Storage] object. Since [Storage] objects are + /// immutable, we can always make a new immutable references to the same + /// storage object without running afoul of aliasing rules. However, + /// care does need to be taken to make sure the lifetime is correctly + /// bound. + /// # Safety + /// The thin pointer should point to a valid [Storage] object. The lifetime + /// of the created [ThinStr] must not outlive the data it refers to. + pub const unsafe fn from_header(ptr: ptr::NonNull) -> Self { + let _marker = marker::PhantomData; + Self { ptr, _marker } + } + + /// Creates a [ThinStr] from a &[ConstStorage]. + /// This method exists because From/Into are not const. + pub const fn from_const_storage(const_storage: &ConstStorage) -> Self { + let ptr = { + let obj = const_storage.as_storage() as *const _ as *const ThinHeader; + // SAFETY: this is just NonNull:from(&ConstStorage) with casts, + // it's just that From::from isn't a const fn. + unsafe { ptr::NonNull::new_unchecked(obj.cast_mut()) } + }; + // SAFETY: ConstStorage objects are valid Storage objects. + unsafe { Self::from_header(ptr) } + } + + /// Gets the header pointer for this string. Note that it is safe to get + /// the pointer, but unsafe to do much with it other than to pass it back + /// to [Self::from_header]. + pub const fn header_ptr(&self) -> ptr::NonNull { + self.ptr + } +} + +impl From for ThinHeader { + fn from(value: usize) -> Self { + let size = value.to_ne_bytes(); + Self { size } + } +} + +impl Default for ThinStr<'_> { + fn default() -> Self { + ThinStr::new() + } +} + +impl<'a> From> for &'a Storage { + fn from(thin_str: ThinStr<'a>) -> &'a Storage { + // SAFETY: the lifetime is accurate, and ThinStr points to a valid + // Storage object, and no mutable references ever exist to Storage + // (it is immutable). + unsafe { Storage::from_header(thin_str.ptr.cast::()) } + } +} + +impl<'a> From<&ThinStr<'a>> for &'a Storage { + fn from(thin_str: &ThinStr<'a>) -> &'a Storage { + <&'a Storage>::from(*thin_str) + } +} + +impl Deref for ThinStr<'_> { + type Target = str; + + fn deref(&self) -> &Self::Target { + let storage: &Storage = self.into(); + storage.deref() + } +} + +/// # Safety +/// Static strings are immutable and can therefore be sent between threads. +unsafe impl Send for ThinStr<'static> {} + +/// # Safety +/// Static strings are immutable and can therefore be shared amongst threads. +unsafe impl Sync for ThinStr<'static> {} + +impl PartialEq for ThinStr<'_> { + fn eq(&self, other: &Self) -> bool { + self.deref() == other.deref() + } +} + +impl Eq for ThinStr<'_> {} + +impl hash::Hash for ThinStr<'_> { + fn hash(&self, state: &mut H) { + self.deref().hash(state) + } +} + +impl AsRef for ThinStr<'_> { + fn as_ref(&self) -> &str { + self.deref() + } +} + +// This allows using &str as a key when looking up ThinStr keys. +impl core::borrow::Borrow for ThinStr<'_> { + fn borrow(&self) -> &str { + self.deref() + } +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn test_empty_str() { + let empty = ThinStr::new(); + assert_eq!(empty.deref(), ""); + + let storage: &Storage = empty.into(); + assert_eq!(storage.deref(), ""); + } + + #[test] + fn test_non_empty_str() { + const PHP_OPEN_STR: &str = " = ConstStorage::from_str(PHP_OPEN_STR); + let thin = ThinStr::from_const_storage(&PHP_OPEN); + assert_eq!(thin.deref(), PHP_OPEN_STR); + + let storage: &Storage = thin.into(); + assert_eq!(storage.deref(), PHP_OPEN_STR); + } +} diff --git a/thin-str/src/thin_string.rs b/thin-str/src/thin_string.rs new file mode 100644 index 0000000000..e042478180 --- /dev/null +++ b/thin-str/src/thin_string.rs @@ -0,0 +1,347 @@ +// Copyright 2024-Present Datadog, Inc. https://www.datadoghq.com/ +// SPDX-License-Identifier: Apache-2.0 + +use super::{ConstStorage, Storage, ThinHeader, ThinStr, EMPTY}; +use allocator_api2::alloc::{AllocError, Allocator, Global, Layout}; +use core::ops::Deref; +use core::{fmt, hash, mem, ptr}; +use tagged_pointer::TaggedPtr; + +/// An owned string which use a "thin" pointer, so it uses fewer bytes than +/// a [Box]'d str. It cannot be resized unless it reallocates, but currently +/// there no apis for this. Currently, there are no APIs for mutable +/// operations on the string data at all. +#[derive(Debug)] +pub struct ThinString { + /// The alignment of [ThinHeader] means that [TaggedPtr] can use the least + /// significant bit to store whether the string is owned or borrowed. Use + /// the [OWNED] and [BORROWED] constants for this. + tagged_ptr: TaggedPtr, + allocator: A, +} + +/// Tag used to represent an owned string. +const OWNED: usize = 0; + +/// Tag used to represent a borrowed string (lives in static storage). +const BORROWED: usize = 1; + +impl Clone for ThinString { + fn clone(&self) -> Self { + Self::from_str_in(self.deref(), self.allocator.clone()) + } +} + +/// # Safety +/// The strings are immutable and can therefore be sent between threads, if +/// the allocator allows it. +unsafe impl Send for ThinString {} + +/// # Safety +/// The strings are immutable and can therefore be shared amongst threads, if +/// the allocator allows it (probably, this requires static storage). +unsafe impl Sync for ThinString {} + +impl ThinString { + /// Creates the empty [ThinString]. + /// This will not allocate. + #[inline] + pub fn new_in(allocator: A) -> Self { + Self::from_const_storage_in(&EMPTY, allocator) + } + + /// Create a ThinString from &static [ConstStorage]. Note that there's not + /// _quite_ enough support to make this a const fn. + /// This will not allocate. + #[inline] + pub fn from_const_storage_in( + const_storage: &'static ConstStorage, + allocator: A, + ) -> ThinString { + let header_ptr = ptr::NonNull::from(const_storage).cast::(); + ThinString { + tagged_ptr: TaggedPtr::new(header_ptr, BORROWED), + allocator, + } + } + + /// Create a [ThinString] from the given &str. + /// This will allocate in the given allocator. + /// # Panics + /// This panics if allocation fails. + #[inline] + pub fn from_str_in(string: &str, allocator: A) -> Self { + Self::try_from_str_in(string, allocator).unwrap() + } + + /// Tries to create a [ThinString] from the given &str. Fails only if + /// allocation fails. + pub fn try_from_str_in(string: &str, allocator: A) -> Result { + let header = Layout::new::(); + let data = Layout::for_value(string); + let Ok((layout, _offset)) = header.extend(data) else { + return Err(AllocError); + }; + // Padding is important here, since the str could be an odd number of + // bytes, and `Layout::extend` doesn't automatically pad. + let layout = layout.pad_to_align(); + + // Sanity check some things to defend against refactoring. + debug_assert_eq!(_offset, mem::size_of::()); + debug_assert_eq!(_offset, mem::size_of::()); + + let obj = allocator.allocate(layout)?; + let header = ThinHeader::from(string.len()); + let header_ptr = obj.cast::(); + + // SAFETY: the memory is valid for writes and has a layout suitable + // for a header. Drop isn't a concern since raw bytes don't need + // dropped. + unsafe { header_ptr.as_ptr().write(header) }; + + // SAFETY: the offset is at the correct place from the base pointer, + // and again, the memory is valid for writes and Drop is irrelevant. + let data = unsafe { obj.cast::().as_ptr().add(mem::size_of::()) }; + + // SAFETY: a new, non-zero sized allocated object must not overlap + // with existing memory by definition. Both regions are at valid for + // a read or write respectively of the specified length, and u8 is + // always aligned. + unsafe { ptr::copy_nonoverlapping(string.as_ptr(), data, string.len()) }; + + // Double check alignment in case refactoring breaks assumptions. + debug_assert_eq!(mem::align_of::(), mem::align_of::()); + + let tagged_ptr = TaggedPtr::new(header_ptr, OWNED); + + Ok(ThinString { + tagged_ptr, + allocator, + }) + } + + /// Creates a [ThinStr] that borrows from the [ThinString]. + #[inline] + pub fn as_thin_str(&self) -> ThinStr { + let obj = self.tagged_ptr.ptr(); + // SAFETY: ThinString points to a valid header, no mutable references + // ever exist for ThinString, and the lifetime ensures the storage + // lives long enough. + unsafe { ThinStr::from_header(obj) } + } + + /// Create a &[Storage] that refers to the same data as this [ThinString]. + #[inline] + pub fn as_storage(&self) -> &Storage { + let obj = self.tagged_ptr.ptr(); + // SAFETY: ThinString points to a valid header, no mutable references + // ever exist for ThinString, and the lifetime ensures the storage + // lives long enough. + unsafe { Storage::from_header(obj) } + } +} + +impl Default for ThinString { + fn default() -> ThinString { + Self::new_in(A::default()) + } +} + +impl PartialEq for ThinString { + fn eq(&self, other: &Self) -> bool { + self.deref() == other.deref() + } +} + +impl Eq for ThinString {} + +impl hash::Hash for ThinString { + fn hash(&self, state: &mut H) { + self.deref().hash(state) + } +} + +impl Drop for ThinString { + fn drop(&mut self) { + if self.tagged_ptr.tag() != OWNED { + return; + } + + let storage = <&Storage>::from(&*self); + let layout = Layout::for_value(storage); + + unsafe { + self.allocator + .deallocate(self.tagged_ptr.ptr().cast(), layout) + }; + } +} + +impl<'a, A: Allocator> From<&'a ThinString> for &'a Storage { + fn from(thin_string: &'a ThinString) -> Self { + thin_string.as_storage() + } +} + +impl From<&'static ConstStorage> for ThinString { + fn from(const_storage: &'static ConstStorage) -> Self { + Self::from_const_storage_in(const_storage, Global) + } +} + +impl<'a, A: Allocator> From<&'a ThinString> for &'a str { + fn from(thin_string: &'a ThinString) -> Self { + let storage: &Storage = thin_string.into(); + storage.deref() + } +} + +impl Deref for ThinString { + type Target = str; + + fn deref(&self) -> &Self::Target { + let storage: &Storage = self.into(); + storage.deref() + } +} + +impl AsRef for ThinString { + fn as_ref(&self) -> &str { + self.deref() + } +} + +impl fmt::Display for ThinString { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.deref().fmt(f) + } +} + +impl From<&str> for ThinString { + fn from(string: &str) -> Self { + Self::from_str_in(string, Global) + } +} + +#[cfg(feature = "std")] +extern crate std; + +#[cfg(feature = "std")] +mod ext { + use super::*; + use std::borrow::Cow; + use std::string::String; + + impl From> for Cow<'static, str> { + fn from(thin_string: ThinString) -> Self { + if thin_string.tagged_ptr.tag() == OWNED { + let string = String::from(thin_string.as_ref()); + Cow::Owned(string) + } else { + // SAFETY: if the string is borrowed, it lives in static memory. + let str = unsafe { mem::transmute::<&str, &str>(thin_string.as_ref()) }; + Cow::Borrowed(str) + } + } + } + + impl From for ThinString { + fn from(string: String) -> Self { + ThinString::from_str_in(string.as_str(), Global) + } + } + + #[cfg(test)] + mod tests { + use super::*; + #[test] + fn test_from_string() { + let string = String::from("hello world"); + let thin_string = ThinString::from(string); + assert_eq!(thin_string.deref(), "hello world"); + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use allocator_api2::alloc::Global; + + #[test] + fn test_thin_strings() { + let strs = [ + "", + "woof", + "datadog", + "She sells sea shells by the sea shore.", + "The quick brown fox jumps over the lazy dog.", + r#" +I can't remember +The best haiku in the world: +This is a tribute. +"#, + ]; + + for str in strs { + let thin_string = ThinString::try_from_str_in(str, Global).unwrap(); + assert_eq!(thin_string.deref(), str); + let thin_str = thin_string.as_thin_str(); + assert_eq!(thin_str.deref(), str); + } + } + + #[test] + fn test_naughty_strings() { + // Naughty strings are still valid UTF-8, let's make sure we don't + // screw up their len and such. + for str in naughty_strings::BLNS { + let str = *str; // unwrap the && + let thin_string = ThinString::try_from_str_in(str, Global).unwrap(); + assert_eq!(thin_string.deref(), str); + let thin_str = thin_string.as_thin_str(); + assert_eq!(thin_str.deref(), str); + } + } + + #[test] + fn test_const_storage() { + static DATADOG: ConstStorage<7> = ConstStorage::from_str("datadog"); + + let thin_string = ThinString::from_const_storage_in(&DATADOG, Global); + assert_eq!(thin_string.deref(), "datadog"); + let thin_str = thin_string.as_thin_str(); + assert_eq!(thin_str.deref(), "datadog"); + } + + #[test] + fn test_from_str() { + let str = "hello world"; + let thin_string = ThinString::from(str); + assert_eq!(thin_string.deref(), "hello world"); + } + + /// This test mimics something we do in the PHP profiler, since pointers + /// to these strings get stored in cache slots. + #[test] + fn test_round_tripping_to_usize() { + let datadog = "See inside any stack, any app, at any scale, anywhere."; + let string = ThinString::from(datadog); + + let bits = { + let thin_str = string.as_thin_str(); + let non_null = thin_str.header_ptr(); + non_null.as_ptr() as usize + }; + + let restored = { + let non_null = unsafe { ptr::NonNull::new_unchecked(bits as *mut ThinHeader) }; + unsafe { ThinStr::from_header(non_null) } + }; + + assert_eq!(datadog, restored.deref()); + + // dropping explicitly to ensure the string is still valid and alive. + drop(string); + } +}