diff --git a/build.gradle.kts b/build.gradle.kts index b74d40e..1accf90 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -6,7 +6,7 @@ plugins { } group = "org.veupathdb.lib" -version = "1.0.2" +version = "1.1.0" repositories { mavenCentral() diff --git a/readme.adoc b/readme.adoc index 99d9bda..dfc115d 100644 --- a/readme.adoc +++ b/readme.adoc @@ -1,5 +1,90 @@ = HashID -Provides a type for a 128-bit hash based identifier. +Provides an implementation of a concrete type for a collidable 128-bit digest +based identifier that is safer and more robust than passing around a raw hash +string. + +== Reasoning + +link:src/main/kotlin/org/veupathdb/lib/hash_id/HashID.kt[`HashID`] guarantees +that its value is a valid MD5 hash value and allows consuming code to rely on +that guarantee and make safe assumptions and calls without needing to verify the +contents of the ID. + +This effectively eliminates the class of bugs that could arise from a missed +validation on a raw input string or byte array. + +In addition to the standard constructors taking a hex string hash, or a raw byte +array, `HashID` provides convenience methods for generating new `HashID` +instances by generating an MD5 hash of a given arbitrary input, including from +`InputStream`. + + +== Usage + +[source, kotlin] +---- +dependencies { + implementation("org.veupathdb.lib:hash-id:1.1.0") +} +---- + + +=== Getting a New `HashID` + +.From a raw value +[source, java] +---- +// Create a HashID by MD5 hashing a string value. +var myID_1 = HashID.ofMD5("my raw value"); + +// Create a HashID by MD5 hashing the contents of an InputStream. +var myID_2 = HashID.ofMD5(someInputStream); + +// Create a HashID by MD5 hashing the stringified form of an arbitrary object. +var myID_3 = HashID.ofMD5(someStringifiableValue); +---- + +.From a hash value +[source, java] +---- +// Construct a hash ID from a valid Hex string +var myID_1 = new HashID(myMD5String); + +// Construct a hash ID from a valid `byte[16]` array. +var myID_2 = new HashID(myMD5ByteArray); +---- + + +=== Validation + +The `HashID` type validates its wrapped value on construction to ensure that it +cannot contain an invalid value. This means that you can rely on the fact that +if your code was called, passing in a `HashID` instance, the wrapped value is, +in fact, a valid 128-bit digest. + +.Invalid Instantiation +[source, java] +---- +try { + var myID_1 = new HashID("Hello world!"); +} catch (IllegalArgumentException e) { + System.out.println("Oops, can't construct a HashID from an invalid string.") +} + +try { + var myID_2 = new HashID(new byte[19]); +} catch (IllegalArgumentException e) { + System.out.println("Oops, can't construct a HashID from an invalid byte array.") +} +---- + + +== Implementation + +`HashID` is effectively just a "new-type" wrapper around an immutable 16 byte +array that offers methods for accessing the raw value as either the contained +byte array or as a 32 character hex string. + +A `HashID` is safe to use in Sets or as keys in a Map. -Intended purpose is job collision detection based on MD5 hashes of job configurations. \ No newline at end of file diff --git a/src/main/kotlin/org/veupathdb/lib/hash_id/HashID.kt b/src/main/kotlin/org/veupathdb/lib/hash_id/HashID.kt index a080b65..623f000 100644 --- a/src/main/kotlin/org/veupathdb/lib/hash_id/HashID.kt +++ b/src/main/kotlin/org/veupathdb/lib/hash_id/HashID.kt @@ -1,11 +1,29 @@ package org.veupathdb.lib.hash_id -import java.io.BufferedInputStream import java.io.InputStream import java.security.MessageDigest /** - * 128-bit ID Represented as a 32 digit hash string. + * Hash/Digest Based Identifier. + * + * This class offers an implementation of a digest-based identifier that is more + * robust than a naked digest string. Once constructed from the source string, + * it generates the digest and efficiently stores the digest as a 128-bit + * number. + * + * This type is effectively a "new type" over `byte[16]` which provides the + * following features: + * + * * Allows consumers to make guarantees about the values they are given as + * a [HashID] can only be constructed via a valid hash either by hex string or + * by a raw array of exactly 16 bytes. + * * Eliminates the class(es) of bugs that could result from dealing with a + * builtin type, relying on ID consumers to perform validation at every + * necessary step. + * * Can be constructed via the standard constructor, or via the provided + * convenience methods allowing the [HashID] to be created from an arbitrary + * [String], [InputStream], or [Object] which will be MD5 hashed to generate + * a new `HashID` instance. * * This value is safe to use in [Sets][Set] and [Maps][Map]. * @@ -20,6 +38,9 @@ class HashID { * The URL-safe, stringified form of this [HashID]. * * This value will be a 32 digit hex string. + * + * **NOTE**: This value is not cached, and is calculated on every call to this + * property/getter. */ val string get() = renderBytes(rawBytes) @@ -102,7 +123,10 @@ class HashID { companion object { /** - * Creates a new [HashID] instance wrapping the MD5 hash of the given value. + * Calculates the MD5 hash of the given value and wraps that hash in a new + * [HashID] instance. + * + * @param value Value that will be MD5 hashed. * * @return The new [HashID]. */ @@ -114,16 +138,24 @@ class HashID { } /** - * Creates a new [HashID] instance wrapping the MD5 hash of the given value. + * Calculates the MD5 hash value of the contents of the given [InputStream] + * and wraps that hash in a new [HashID] instance. + * + * @param stream InputStream over the value(s) that will be MD5 hashed. + * + * @param close Whether the given input stream should be closed by this + * method. + * + * Defaults to `false`. * * @return The new [HashID]. */ @JvmStatic @JvmOverloads - fun ofMD5(value: InputStream, close: Boolean = false): HashID { + fun ofMD5(stream: InputStream, close: Boolean = false): HashID { val digest = MessageDigest.getInstance("MD5") - val stream = if (value is BufferedInputStream) value else BufferedInputStream(value) + val stream = stream.buffered() val buffer = ByteArray(8192) if (close) { @@ -154,7 +186,15 @@ class HashID { } /** - * Creates a new [HashID] instance wrapping the MD5 hash of the given value. + * Calculates the MD5 hash value of the stringified form of the given value + * and wraps that hash in a new [HashID] instance. + * + * This method is a simple convenience method over: + * ``` + * HashID.ofMD5(myType.toString()) + * ``` + * + * @param value Value that will be MD5 hashed. * * @return The new [HashID]. */