Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[stdlib] Switch to hasher based hashing #3701

Draft
wants to merge 1 commit into
base: nightly
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5,245 changes: 713 additions & 4,532 deletions magic.lock

Large diffs are not rendered by default.

164 changes: 81 additions & 83 deletions stdlib/benchmarks/hashlib/bench_hash.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -14,21 +14,15 @@
# NOTE: to test changes on the current branch using run-benchmarks.sh, remove
# the -t flag. Remember to replace it again before pushing any code.

from hashlib._ahash import (
MULTIPLE,
ROT,
U128,
U256,
from benchmark import Bench, BenchConfig, Bencher, BenchId
from bit import byte_swap, rotate_bits_left
from memory import UnsafePointer
from hashlib.ahash import (
AHasher,
_folded_multiply,
_read_small,
)
from hashlib._hasher import _hash_with_hasher
from hashlib.hash import hash as old_hash

from benchmark import Bench, BenchConfig, Bencher, BenchId
from bit import byte_swap, rotate_bits_left
from memory import UnsafePointer
from hashlib.fnv1a import Fnv1a

# Source: https://www.101languages.net/arabic/most-common-arabic-words/
alias words_ar = """
Expand Down Expand Up @@ -599,50 +593,25 @@ fn gen_word_pairs[words: String = words_en]() -> List[String]:
# Benchmarks
# ===-----------------------------------------------------------------------===#
@parameter
fn bench_small_keys[s: String](mut b: Bencher) raises:
var words = gen_word_pairs[s]()

@always_inline
@parameter
fn call_fn():
for w in words:
var h = old_hash(w[])
benchmark.keep(h)

b.iter[call_fn]()


@parameter
fn bench_small_keys_new_hash_function[s: String](mut b: Bencher) raises:
fn bench_small_keys[s: String, H: Hasher](mut b: Bencher) raises:
var words = gen_word_pairs[s]()

@always_inline
@parameter
fn call_fn():
for w in words:
var h = _hash_with_hasher(w[].unsafe_ptr(), w[].byte_length())
var h = hash[HasherType=H](w[])
benchmark.keep(h)

b.iter[call_fn]()


@parameter
fn bench_long_key[s: String](mut b: Bencher) raises:
fn bench_long_key[s: String, H: Hasher](mut b: Bencher) raises:
@always_inline
@parameter
fn call_fn():
var h = old_hash(s)
benchmark.keep(h)

b.iter[call_fn]()


@parameter
fn bench_long_key_new_hash_function[s: String](mut b: Bencher) raises:
@always_inline
@parameter
fn call_fn():
var h = _hash_with_hasher(s.unsafe_ptr(), s.byte_length())
var h = hash[HasherType=H](s)
benchmark.keep(h)

b.iter[call_fn]()
Expand All @@ -652,64 +621,93 @@ fn bench_long_key_new_hash_function[s: String](mut b: Bencher) raises:
# Benchmark Main
# ===-----------------------------------------------------------------------===#
def main():
alias ahasher = AHasher[SIMD[DType.uint64, 4](0)]
var m = Bench(BenchConfig(num_repetitions=1))
m.bench_function[bench_small_keys[words_ar]](BenchId("bench_small_keys_ar"))
m.bench_function[bench_small_keys_new_hash_function[words_ar]](
BenchId("bench_small_keys_new_ar")
m.bench_function[bench_small_keys[words_ar, ahasher]](
BenchId("bench_small_keys_ar_ahash")
)
m.bench_function[bench_small_keys[words_el]](BenchId("bench_small_keys_el"))
m.bench_function[bench_small_keys_new_hash_function[words_el]](
BenchId("bench_small_keys_new_el")
m.bench_function[bench_small_keys[words_el, ahasher]](
BenchId("bench_small_keys_el_ahash")
)
m.bench_function[bench_small_keys[words_en]](BenchId("bench_small_keys_en"))
m.bench_function[bench_small_keys_new_hash_function[words_en]](
BenchId("bench_small_keys_new_en")
m.bench_function[bench_small_keys[words_en, ahasher]](
BenchId("bench_small_keys_en_ahash")
)
m.bench_function[bench_small_keys[words_he]](BenchId("bench_small_keys_he"))
m.bench_function[bench_small_keys_new_hash_function[words_he]](
BenchId("bench_small_keys_new_he")
m.bench_function[bench_small_keys[words_he, ahasher]](
BenchId("bench_small_keys_he_ahash")
)
m.bench_function[bench_small_keys[words_lv]](BenchId("bench_small_keys_lv"))
m.bench_function[bench_small_keys_new_hash_function[words_lv]](
BenchId("bench_small_keys_new_lv")
m.bench_function[bench_small_keys[words_lv, ahasher]](
BenchId("bench_small_keys_lv_ahash")
)
m.bench_function[bench_small_keys[words_pl]](BenchId("bench_small_keys_pl"))
m.bench_function[bench_small_keys_new_hash_function[words_pl]](
BenchId("bench_small_keys_new_pl")
m.bench_function[bench_small_keys[words_pl, ahasher]](
BenchId("bench_small_keys_pl_ahash")
)
m.bench_function[bench_small_keys[words_ru]](BenchId("bench_small_keys_ru"))
m.bench_function[bench_small_keys_new_hash_function[words_ru]](
BenchId("bench_small_keys_new_ru")
m.bench_function[bench_small_keys[words_ru, ahasher]](
BenchId("bench_small_keys_ru_ahash")
)

m.bench_function[bench_long_key[words_ar]](BenchId("bench_long_key_ar"))
m.bench_function[bench_long_key_new_hash_function[words_ar]](
BenchId("bench_long_key_new_ar")
m.bench_function[bench_small_keys[words_ar, Fnv1a]](
BenchId("bench_small_keys_ar_fnv1a")
)
m.bench_function[bench_small_keys[words_el, Fnv1a]](
BenchId("bench_small_keys_el_fnv1a")
)
m.bench_function[bench_small_keys[words_en, Fnv1a]](
BenchId("bench_small_keys_en_fnv1a")
)
m.bench_function[bench_small_keys[words_he, Fnv1a]](
BenchId("bench_small_keys_he_fnv1a")
)
m.bench_function[bench_small_keys[words_lv, Fnv1a]](
BenchId("bench_small_keys_lv_fnv1a")
)
m.bench_function[bench_small_keys[words_pl, Fnv1a]](
BenchId("bench_small_keys_pl_fnv1a")
)
m.bench_function[bench_long_key[words_el]](BenchId("bench_long_key_el"))
m.bench_function[bench_long_key_new_hash_function[words_el]](
BenchId("bench_long_key_new_el")
m.bench_function[bench_small_keys[words_ru, Fnv1a]](
BenchId("bench_small_keys_ru_fnv1a")
)

m.bench_function[bench_long_key[words_ar, ahasher]](
BenchId("bench_long_key_ar_ahash")
)
m.bench_function[bench_long_key[words_el, ahasher]](
BenchId("bench_long_key_el_ahash")
)
m.bench_function[bench_long_key[words_en, ahasher]](
BenchId("bench_long_key_keys_en_ahash")
)
m.bench_function[bench_long_key[words_he, ahasher]](
BenchId("bench_long_key_he_ahash")
)
m.bench_function[bench_long_key[words_lv, ahasher]](
BenchId("bench_long_key_lv_ahash")
)
m.bench_function[bench_long_key[words_pl, ahasher]](
BenchId("bench_long_key_pl_ahash")
)
m.bench_function[bench_long_key[words_ru, ahasher]](
BenchId("bench_long_key_ru_ahash")
)

m.bench_function[bench_long_key[words_ar, Fnv1a]](
BenchId("bench_long_key_ar_fnv1a")
)
m.bench_function[bench_long_key[words_en]](
BenchId("bench_long_key_keys_en")
m.bench_function[bench_long_key[words_el, Fnv1a]](
BenchId("bench_long_key_el_fnv1a")
)
m.bench_function[bench_long_key_new_hash_function[words_en]](
BenchId("bench_long_key_new_en")
m.bench_function[bench_long_key[words_en, Fnv1a]](
BenchId("bench_long_key_keys_en_fnv1a")
)
m.bench_function[bench_long_key[words_he]](BenchId("bench_long_key_he"))
m.bench_function[bench_long_key_new_hash_function[words_he]](
BenchId("bench_long_key_new_he")
m.bench_function[bench_long_key[words_he, Fnv1a]](
BenchId("bench_long_key_he_fnv1a")
)
m.bench_function[bench_long_key[words_lv]](BenchId("bench_long_key_lv"))
m.bench_function[bench_long_key_new_hash_function[words_lv]](
BenchId("bench_long_key_new_lv")
m.bench_function[bench_long_key[words_lv, Fnv1a]](
BenchId("bench_long_key_lv_fnv1a")
)
m.bench_function[bench_long_key[words_pl]](BenchId("bench_long_key_pl"))
m.bench_function[bench_long_key_new_hash_function[words_pl]](
BenchId("bench_long_key_new_pl")
m.bench_function[bench_long_key[words_pl, Fnv1a]](
BenchId("bench_long_key_pl_fnv1a")
)
m.bench_function[bench_long_key[words_ru]](BenchId("bench_long_key_ru"))
m.bench_function[bench_long_key_new_hash_function[words_ru]](
BenchId("bench_long_key_new_ru")
m.bench_function[bench_long_key[words_ru, Fnv1a]](
BenchId("bench_long_key_ru_fnv1a")
)
m.dump_report()
12 changes: 1 addition & 11 deletions stdlib/src/builtin/dtype.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ These are Mojo built-ins, so you don't need to import them.
"""

from collections import KeyElement
from hashlib._hasher import _HashableWithHasher, _Hasher
from sys import bitwidthof, os_is_windows, sizeof

alias _mIsSigned = UInt8(1)
Expand All @@ -33,7 +32,6 @@ struct DType(
Representable,
KeyElement,
CollectionElementNew,
_HashableWithHasher,
):
"""Represents DType and provides methods for working with it."""

Expand Down Expand Up @@ -371,15 +369,7 @@ struct DType(
self._as_i8(), rhs._as_i8()
)

fn __hash__(self) -> UInt:
"""Return a 64-bit hash for this `DType` value.

Returns:
A 64-bit integer hash of this `DType` value.
"""
return hash(UInt8(self._as_i8()))

fn __hash__[H: _Hasher](self, mut hasher: H):
fn __hash__[H: Hasher](self, mut hasher: H):
"""Updates hasher with this `DType` value.

Parameters:
Expand Down
16 changes: 1 addition & 15 deletions stdlib/src/builtin/int.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,6 @@ from collections.string import (
_calc_initial_buffer_size_int32,
_calc_initial_buffer_size_int64,
)
from hashlib._hasher import _HashableWithHasher, _Hasher
from hashlib.hash import _hash_simd
from math import Ceilable, CeilDivable, Floorable, Truncable
from sys import bitwidthof

Expand Down Expand Up @@ -306,7 +304,6 @@ struct Int(
KeyElement,
Roundable,
IntLike,
_HashableWithHasher,
):
"""This type represents an integer value."""

Expand Down Expand Up @@ -1130,18 +1127,7 @@ struct Int(
"""
return str(self)

fn __hash__(self) -> UInt:
"""Hash the int using builtin hash.

Returns:
A 64-bit hash value. This value is _not_ suitable for cryptographic
uses. Its intended usage is for data structures. See the `hash`
builtin documentation for more details.
"""
# TODO(MOCO-636): switch to DType.index
return _hash_simd(Scalar[DType.int64](self))

fn __hash__[H: _Hasher](self, mut hasher: H):
fn __hash__[H: Hasher](self, mut hasher: H):
"""Updates hasher with this int value.

Parameters:
Expand Down
15 changes: 1 addition & 14 deletions stdlib/src/builtin/simd.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,6 @@ from collections.string import (
_calc_format_buffer_size,
_calc_initial_buffer_size,
)
from hashlib._hasher import _HashableWithHasher, _Hasher
from hashlib.hash import _hash_simd
from math import Ceilable, CeilDivable, Floorable, Truncable
from math.math import _call_ptx_intrinsic
from os import abort
Expand Down Expand Up @@ -242,7 +240,6 @@ struct SIMD[type: DType, size: Int](
Floorable,
Writable,
Hashable,
_HashableWithHasher,
Intable,
IntLike,
Representable,
Expand Down Expand Up @@ -1675,17 +1672,7 @@ struct SIMD[type: DType, size: Int](
# TODO: see how can we implement this.
return llvm_intrinsic["llvm.round", Self, has_side_effect=False](self)

fn __hash__(self) -> UInt:
"""Hash the value using builtin hash.

Returns:
A 64-bit hash value. This value is _not_ suitable for cryptographic
uses. Its intended usage is for data structures. See the `hash`
builtin documentation for more details.
"""
return _hash_simd(self)

fn __hash__[H: _Hasher](self, mut hasher: H):
fn __hash__[H: Hasher](self, mut hasher: H):
"""Updates hasher with this SIMD value.

Parameters:
Expand Down
14 changes: 1 addition & 13 deletions stdlib/src/builtin/string_literal.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ These are Mojo built-ins, so you don't need to import them.
"""

from collections import List
from hashlib._hasher import _HashableWithHasher, _Hasher
from sys.ffi import c_char

from memory import UnsafePointer, memcpy, Span
Expand Down Expand Up @@ -45,7 +44,6 @@ struct StringLiteral(
Stringable,
FloatableRaising,
BytesCollectionElement,
_HashableWithHasher,
):
"""This type represents a string literal.

Expand Down Expand Up @@ -405,17 +403,7 @@ struct StringLiteral(
"""
return self.__str__().__repr__()

fn __hash__(self) -> UInt:
"""Hash the underlying buffer using builtin hash.

Returns:
A 64-bit hash value. This value is _not_ suitable for cryptographic
uses. Its intended usage is for data structures. See the `hash`
builtin documentation for more details.
"""
return hash(self.unsafe_ptr(), len(self))

fn __hash__[H: _Hasher](self, mut hasher: H):
fn __hash__[H: Hasher](self, mut hasher: H):
"""Updates hasher with the underlying bytes.

Parameters:
Expand Down
17 changes: 2 additions & 15 deletions stdlib/src/builtin/uint.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,6 @@
These are Mojo built-ins, so you don't need to import them.
"""

from hashlib._hasher import _HashableWithHasher, _Hasher
from hashlib.hash import _hash_simd
from sys import bitwidthof

from documentation import doc_private
Expand All @@ -27,7 +25,7 @@ from utils._visualizers import lldb_formatter_wrapping_type
@lldb_formatter_wrapping_type
@value
@register_passable("trivial")
struct UInt(IntLike, _HashableWithHasher):
struct UInt(IntLike, Hashable):
"""This type represents an unsigned integer.

An unsigned integer represents a positive integral number.
Expand Down Expand Up @@ -157,18 +155,7 @@ struct UInt(IntLike, _HashableWithHasher):
"""
return String.write("UInt(", str(self), ")")

fn __hash__(self) -> UInt:
"""Hash the UInt using builtin hash.

Returns:
A 64-bit hash value. This value is _not_ suitable for cryptographic
uses. Its intended usage is for data structures. See the `hash`
builtin documentation for more details.
"""
# TODO(MOCO-636): switch to DType.index
return _hash_simd(Scalar[DType.uint64](self))

fn __hash__[H: _Hasher](self, mut hasher: H):
fn __hash__[H: Hasher](self, mut hasher: H):
"""Updates hasher with this uint value.

Parameters:
Expand Down
Loading
Loading