From ce581847befabbd41af81ca63a7e71001c03c894 Mon Sep 17 00:00:00 2001 From: semantic-release-bot Date: Fri, 27 Oct 2023 06:07:18 +0000 Subject: [PATCH 1/6] Build: Released 3.3.0 [skip ci] # [3.3.0](https://github.com/ashvardanian/simsimd/compare/v3.2.0...v3.3.0) (2023-10-27) ### Add * VNNI capability ([2dd106f](https://github.com/ashvardanian/simsimd/commit/2dd106fec77b8463d42511507d0c96931690769d)) ### Fix * AVX2 `int8` angular distance ([143aa34](https://github.com/ashvardanian/simsimd/commit/143aa349a8ec0e3fb374acdc712e63d058b3937a)) * Use `rtol` for L2sq and `atol` for other ([89a61b3](https://github.com/ashvardanian/simsimd/commit/89a61b3727592891a2b0b72464427beecb0610eb)) ### Improve * `goto` to avoid more conditions ([ef71253](https://github.com/ashvardanian/simsimd/commit/ef71253fc78a4364efd7fa04c8f8bf8280ba46f6)) * Run benchmarks on 1 thread ([2e1a714](https://github.com/ashvardanian/simsimd/commit/2e1a714e709f3fd09d3ffc828597cd9a2bd6d518)) * Use BMI2 and AVX-512VNNI for masks & fma ([161eee9](https://github.com/ashvardanian/simsimd/commit/161eee99c21663b1af3c858bbb12a47cf831b781)) ### Make * Disable `-ffast-math` ([afcb7f8](https://github.com/ashvardanian/simsimd/commit/afcb7f8a5e32bc5c7ae34abd3bc99178e55cc034)) * Separate CI ([62c4901](https://github.com/ashvardanian/simsimd/commit/62c49015a032623d1cb1993c16fad80b234f0a81)) * Use recent compilers ([ac01aa2](https://github.com/ashvardanian/simsimd/commit/ac01aa28e2fe06d8751d895c7a5cdfa7feaeebc8)) ### Test * distances for `int8` arrays ([c1c06ba](https://github.com/ashvardanian/simsimd/commit/c1c06bac5382f37e51dadbb0ecef9994767473f6)) * Normalize bitwise distances ([3edfe0b](https://github.com/ashvardanian/simsimd/commit/3edfe0b667e18169e349cdb04321415e34ef6b7e)) --- VERSION | 2 +- package.json | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/VERSION b/VERSION index 944880fa..15a27998 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -3.2.0 +3.3.0 diff --git a/package.json b/package.json index ac6997d8..cc7a8026 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "simsimd", - "version": "3.2.0", + "version": "3.3.0", "description": "Vector Similarity Functions 3x-200x Faster than SciPy and NumPy", "author": "Ash Vardanian", "license": "Apache 2.0", From f2555af5b2b4aa0209c0f3e8ef15d3f800c5bbb3 Mon Sep 17 00:00:00 2001 From: Nairi Harutyunyan Date: Mon, 30 Oct 2023 23:17:51 +0200 Subject: [PATCH 2/6] Add: .npmignore & some minor fixes (#37) * Add npmignore, minor changes related to the tests * Update release.yml * Remove log --- .github/workflows/release.yml | 4 ++-- .npmignore | 8 ++++++++ README.md | 2 +- javascript/{test.js => test/simsimd.test.js} | 0 package.json | 8 ++++---- 5 files changed, 15 insertions(+), 7 deletions(-) create mode 100644 .npmignore rename javascript/{test.js => test/simsimd.test.js} (100%) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index dc44b887..34d63cad 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -131,8 +131,8 @@ jobs: - name: Build and Test run: | - npm install - npm ci + npm install && + npm ci && npm test - name: Publish diff --git a/.npmignore b/.npmignore new file mode 100644 index 00000000..82e47538 --- /dev/null +++ b/.npmignore @@ -0,0 +1,8 @@ +python +golang +go.mod +.github +.vscode +pyproject.toml +.clang-format +VERSION \ No newline at end of file diff --git a/README.md b/README.md index 73601ca7..de086604 100644 --- a/README.md +++ b/README.md @@ -189,7 +189,7 @@ __To test and benchmark JavaScript bindings__: ```sh npm install --dev npm test -npm bench +npm run bench ``` __To test and benchmark GoLang bindings__: diff --git a/javascript/test.js b/javascript/test/simsimd.test.js similarity index 100% rename from javascript/test.js rename to javascript/test/simsimd.test.js diff --git a/package.json b/package.json index cc7a8026..3417f6c3 100644 --- a/package.json +++ b/package.json @@ -6,6 +6,10 @@ "license": "Apache 2.0", "main": "javascript/simsimd.js", "type": "module", + "scripts": { + "test": "node --test ./javascript/test", + "bench": "node ./javascript/bench.js" + }, "repository": { "type": "git", "url": "https://github.com/ashvardanian/simsimd.git" @@ -19,10 +23,6 @@ "bindings": "~1.2.1", "node-addon-api": "^3.0.0" }, - "scripts": { - "test": "node --test ./javascript/test.js", - "bench": "node ./javascript/bench.js" - }, "devDependencies": { "benchmark": "^2.1.4", "mathjs": "^11.11.2", From 5df52200eea51e6f2e87a7ce9f3f974280fbbb77 Mon Sep 17 00:00:00 2001 From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com> Date: Mon, 30 Oct 2023 15:36:24 -0700 Subject: [PATCH 3/6] Docs: Download stats --- README.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/README.md b/README.md index de086604..4de7556c 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,11 @@ # SimSIMD 📏 +
+ PyPI - Downloads + npm +GitHub code size in bytes +
+ ## Efficient Alternative to [`scipy.spatial.distance`][scipy] and [`numpy.inner`][numpy] SimSIMD leverages SIMD intrinsics, capabilities that only select compilers effectively utilize. This framework supports conventional AVX2 instructions on x86, NEON on Arm, as well as __rare__ AVX-512 FP16 instructions on x86 and Scalable Vector Extensions (SVE) on Arm. Designed specifically for Machine Learning contexts, it's optimized for handling high-dimensional vector embeddings. From 7f9c6df055824086056de541a57e9c43a87191c9 Mon Sep 17 00:00:00 2001 From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com> Date: Mon, 30 Oct 2023 15:36:46 -0700 Subject: [PATCH 4/6] Fix: normalize vectors for JS tests --- javascript/test/simsimd.test.js | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/javascript/test/simsimd.test.js b/javascript/test/simsimd.test.js index fead7207..167a7543 100644 --- a/javascript/test/simsimd.test.js +++ b/javascript/test/simsimd.test.js @@ -4,12 +4,6 @@ import assert from 'node:assert'; const simsimd = bindings('simsimd'); -const f32Array1 = new Float32Array([1.0, 2.0, 3.0]); -const f32Array2 = new Float32Array([4.0, 5.0, 6.0]); - -const u8Array1 = new Uint8Array([1, 2, 3]); -const u8Array2 = new Uint8Array([4, 5, 6]); - function assertAlmostEqual(actual, expected, tolerance = 1e-6) { const lowerBound = expected - tolerance; const upperBound = expected + tolerance; @@ -17,20 +11,26 @@ function assertAlmostEqual(actual, expected, tolerance = 1e-6) { } test('Distance from itself', () => { - assertAlmostEqual(simsimd.sqeuclidean(f32Array1, f32Array1), 0.0, 0.01); - assertAlmostEqual(simsimd.cosine(f32Array1, f32Array1), 0.0, 0.01); - // Inner-product distance on non-nroamalized vectors would yield: - // 1 - 1 - 4 - 9 = -13 - assertAlmostEqual(simsimd.inner(f32Array1, f32Array1), -13.0, 0.01); + const f32s = new Float32Array([1.0, 2.0, 3.0]); + assertAlmostEqual(simsimd.sqeuclidean(f32s, f32s), 0.0, 0.01); + assertAlmostEqual(simsimd.cosine(f32s, f32s), 0.0, 0.01); - assertAlmostEqual(simsimd.kullbackleibler(f32Array1, f32Array1), 0.0, 0.01); - assertAlmostEqual(simsimd.jensenshannon(f32Array1, f32Array1), 0.0, 0.01); + const f32sNormalized = new Float32Array([1.0 / Math.sqrt(14), 2.0 / Math.sqrt(14), 3.0 / Math.sqrt(14)]); + assertAlmostEqual(simsimd.inner(f32sNormalized, f32sNormalized), 0.0, 0.01); - assertAlmostEqual(simsimd.hamming(u8Array1, u8Array1), 0.0, 0.01); - assertAlmostEqual(simsimd.jaccard(u8Array1, u8Array1), 0.0, 0.01); + const f32sDistribution = new Float32Array([1.0 / 6, 2.0 / 6, 3.0 / 6]); + assertAlmostEqual(simsimd.kullbackleibler(f32sDistribution, f32sDistribution), 0.0, 0.01); + assertAlmostEqual(simsimd.jensenshannon(f32sDistribution, f32sDistribution), 0.0, 0.01); + + const u8s = new Uint8Array([1, 2, 3]); + assertAlmostEqual(simsimd.hamming(u8s, u8s), 0.0, 0.01); + assertAlmostEqual(simsimd.jaccard(u8s, u8s), 0.0, 0.01); }); +const f32Array1 = new Float32Array([1.0, 2.0, 3.0]); +const f32Array2 = new Float32Array([4.0, 5.0, 6.0]); + test('Squared Euclidean Distance', () => { const result = simsimd.sqeuclidean(f32Array1, f32Array2); assertAlmostEqual(result, 27.0, 0.01); From dd655c1406cb676e046268c66f561abb35a8ad36 Mon Sep 17 00:00:00 2001 From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com> Date: Mon, 30 Oct 2023 16:00:43 -0700 Subject: [PATCH 5/6] Test: Compare our `f16` to SciPy `f64` --- python/test.py | 43 ++++++++++++++++++++++++++++++++----------- 1 file changed, 32 insertions(+), 11 deletions(-) diff --git a/python/test.py b/python/test.py index 4e59b834..95199969 100644 --- a/python/test.py +++ b/python/test.py @@ -6,7 +6,7 @@ # For normalized distances we use the absolute tolerance, because the result is close to zero. # For unnormalized ones (like squared Euclidean or Jaccard), we use the relative. SIMSIMD_RTOL = 0.2 -SIMSIMD_ATOL = 0.1 +SIMSIMD_ATOL = 0.15 def test_pointers_availability(): @@ -29,13 +29,13 @@ def test_pointers_availability(): @pytest.mark.parametrize("dtype", [np.float32, np.float16]) def test_dot(ndim, dtype): """Compares the simd.dot() function with numpy.dot(), measuring the accuracy error for f16, and f32 types.""" - a = np.random.randn(ndim).astype(dtype) - b = np.random.randn(ndim).astype(dtype) + a = np.random.randn(ndim) + b = np.random.randn(ndim) a /= np.linalg.norm(a) b /= np.linalg.norm(b) expected = 1 - np.inner(a, b) - result = simd.inner(a, b) + result = simd.inner(a.astype(dtype), b.astype(dtype)) np.testing.assert_allclose(expected, result, atol=SIMSIMD_ATOL, rtol=0) @@ -45,11 +45,11 @@ def test_dot(ndim, dtype): @pytest.mark.parametrize("dtype", [np.float32, np.float16]) def test_sqeuclidean(ndim, dtype): """Compares the simd.sqeuclidean() function with scipy.spatial.distance.sqeuclidean(), measuring the accuracy error for f16, and f32 types.""" - a = np.random.randn(ndim).astype(dtype) - b = np.random.randn(ndim).astype(dtype) + a = np.random.randn(ndim) + b = np.random.randn(ndim) expected = spd.sqeuclidean(a, b) - result = simd.sqeuclidean(a, b) + result = simd.sqeuclidean(a.astype(dtype), b.astype(dtype)) np.testing.assert_allclose(expected, result, atol=0, rtol=SIMSIMD_RTOL) @@ -59,14 +59,33 @@ def test_sqeuclidean(ndim, dtype): @pytest.mark.parametrize("dtype", [np.float32, np.float16]) def test_cosine(ndim, dtype): """Compares the simd.cosine() function with scipy.spatial.distance.cosine(), measuring the accuracy error for f16, and f32 types.""" - a = np.random.randn(ndim).astype(dtype) - b = np.random.randn(ndim).astype(dtype) + a = np.random.randn(ndim) + b = np.random.randn(ndim) expected = spd.cosine(a, b) - result = simd.cosine(a, b) + result = simd.cosine(a.astype(dtype), b.astype(dtype)) + + np.testing.assert_allclose(expected, result, atol=SIMSIMD_ATOL, rtol=0) + + +@pytest.mark.repeat(50) +@pytest.mark.parametrize("ndim", [3, 97]) +@pytest.mark.parametrize("dtype", [np.float32, np.float16]) +def test_jensen_shannon(ndim, dtype): + """Compares the simd.jensenshannon() function with scipy.spatial.distance.jensenshannon(), measuring the accuracy error for f16, and f32 types.""" + a = np.random.rand(ndim) + b = np.random.rand(ndim) + + # Normalize to make them probability distributions + a /= np.sum(a) + b /= np.sum(b) + + expected = spd.jensenshannon(a, b) + result = simd.jensenshannon(a.astype(dtype), b.astype(dtype)) np.testing.assert_allclose(expected, result, atol=SIMSIMD_ATOL, rtol=0) + @pytest.mark.repeat(50) @pytest.mark.parametrize("ndim", [3, 97, 1536]) def test_cosine_i8(ndim): @@ -79,6 +98,7 @@ def test_cosine_i8(ndim): np.testing.assert_allclose(expected, result, atol=SIMSIMD_ATOL, rtol=0) + @pytest.mark.repeat(50) @pytest.mark.parametrize("ndim", [3, 97, 1536]) def test_sqeuclidean_i8(ndim): @@ -91,6 +111,7 @@ def test_sqeuclidean_i8(ndim): np.testing.assert_allclose(expected, result, atol=0, rtol=SIMSIMD_RTOL) + @pytest.mark.parametrize("ndim", [3, 97, 1536]) @pytest.mark.parametrize("dtype", [np.float32, np.float16]) def test_cosine_zero_vector(ndim, dtype): @@ -128,7 +149,7 @@ def test_jaccard(ndim): a = np.random.randint(2, size=ndim).astype(np.uint8) b = np.random.randint(2, size=ndim).astype(np.uint8) - expected = spd.jaccard(a, b) + expected = spd.jaccard(a, b) result = simd.jaccard(np.packbits(a), np.packbits(b)) np.testing.assert_allclose(expected, result, atol=SIMSIMD_ATOL, rtol=0) From a70479f2182f94615043a337e70543a230248bb5 Mon Sep 17 00:00:00 2001 From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com> Date: Mon, 30 Oct 2023 16:02:45 -0700 Subject: [PATCH 6/6] Improve: Same epsilon for JS/KL backends --- .vscode/settings.json | 1 + include/simsimd/probability.h | 44 +++++++++++++++++------------------ python/lib.c | 4 ++-- python/test.py | 4 ++-- 4 files changed, 27 insertions(+), 26 deletions(-) diff --git a/.vscode/settings.json b/.vscode/settings.json index 4357e943..46906813 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -107,6 +107,7 @@ "sqeuclidean", "tanimoto", "typedarray", + "unnormalized", "unsw", "Vardanian", "vpopcntdq" diff --git a/include/simsimd/probability.h b/include/simsimd/probability.h index 4ef77f7f..8ded7ee4 100644 --- a/include/simsimd/probability.h +++ b/include/simsimd/probability.h @@ -52,17 +52,17 @@ extern "C" { #endif -SIMSIMD_MAKE_KL(serial, f32, f32, SIMSIMD_IDENTIFY, 1e-6) // simsimd_serial_f32_kl -SIMSIMD_MAKE_JS(serial, f32, f32, SIMSIMD_IDENTIFY, 1e-6) // simsimd_serial_f32_js +SIMSIMD_MAKE_KL(serial, f32, f32, SIMSIMD_IDENTIFY, 1e-7) // simsimd_serial_f32_kl +SIMSIMD_MAKE_JS(serial, f32, f32, SIMSIMD_IDENTIFY, 1e-7) // simsimd_serial_f32_js -SIMSIMD_MAKE_KL(serial, f16, f32, SIMSIMD_UNCOMPRESS_F16, 1e-3) // simsimd_serial_f16_kl -SIMSIMD_MAKE_JS(serial, f16, f32, SIMSIMD_UNCOMPRESS_F16, 1e-3) // simsimd_serial_f16_js +SIMSIMD_MAKE_KL(serial, f16, f32, SIMSIMD_UNCOMPRESS_F16, 1e-7) // simsimd_serial_f16_kl +SIMSIMD_MAKE_JS(serial, f16, f32, SIMSIMD_UNCOMPRESS_F16, 1e-7) // simsimd_serial_f16_js -SIMSIMD_MAKE_KL(accurate, f32, f64, SIMSIMD_IDENTIFY, 1e-6) // simsimd_accurate_f32_kl -SIMSIMD_MAKE_JS(accurate, f32, f64, SIMSIMD_IDENTIFY, 1e-6) // simsimd_accurate_f32_js +SIMSIMD_MAKE_KL(accurate, f32, f64, SIMSIMD_IDENTIFY, 1e-7) // simsimd_accurate_f32_kl +SIMSIMD_MAKE_JS(accurate, f32, f64, SIMSIMD_IDENTIFY, 1e-7) // simsimd_accurate_f32_js -SIMSIMD_MAKE_KL(accurate, f16, f64, SIMSIMD_UNCOMPRESS_F16, 1e-6) // simsimd_accurate_f16_kl -SIMSIMD_MAKE_JS(accurate, f16, f64, SIMSIMD_UNCOMPRESS_F16, 1e-6) // simsimd_accurate_f16_js +SIMSIMD_MAKE_KL(accurate, f16, f64, SIMSIMD_UNCOMPRESS_F16, 1e-7) // simsimd_accurate_f16_kl +SIMSIMD_MAKE_JS(accurate, f16, f64, SIMSIMD_UNCOMPRESS_F16, 1e-7) // simsimd_accurate_f16_js #if SIMSIMD_TARGET_ARM #if SIMSIMD_TARGET_ARM_NEON @@ -108,7 +108,7 @@ __attribute__((target("+simd"))) // inline static simsimd_f32_t simsimd_neon_f32_kl(simsimd_f32_t const* a, simsimd_f32_t const* b, simsimd_size_t n) { float32x4_t sum_vec = vdupq_n_f32(0); - simsimd_f32_t epsilon = 1e-6; + simsimd_f32_t epsilon = 1e-7; float32x4_t epsilon_vec = vdupq_n_f32(epsilon); simsimd_size_t i = 0; for (; i + 4 <= n; i += 4) { @@ -129,7 +129,7 @@ simsimd_neon_f32_kl(simsimd_f32_t const* a, simsimd_f32_t const* b, simsimd_size __attribute__((target("+simd"))) inline static simsimd_f32_t simsimd_neon_f32_js(simsimd_f32_t const* a, simsimd_f32_t const* b, simsimd_size_t n) { float32x4_t sum_vec = vdupq_n_f32(0); - simsimd_f32_t epsilon = 1e-6; + simsimd_f32_t epsilon = 1e-7; float32x4_t epsilon_vec = vdupq_n_f32(epsilon); simsimd_size_t i = 0; for (; i + 4 <= n; i += 4) { @@ -147,11 +147,11 @@ simsimd_neon_f32_js(simsimd_f32_t const* a, simsimd_f32_t const* b, simsimd_size simsimd_f32_t log2_normalizer = 0.693147181f; simsimd_f32_t sum = vaddvq_f32(sum_vec) * log2_normalizer; for (; i < n; ++i) { - simsimd_f32_t mi = a[i] + b[i]; + simsimd_f32_t mi = 0.5f * (a[i] + b[i]); sum += a[i] * SIMSIMD_LOG((a[i] + epsilon) / (mi + epsilon)); sum += b[i] * SIMSIMD_LOG((b[i] + epsilon) / (mi + epsilon)); } - return sum * 0.5f; + return sum; } /* @@ -168,7 +168,7 @@ __attribute__((target("+simd+fp16"))) // inline static simsimd_f32_t simsimd_neon_f16_kl(simsimd_f16_t const* a, simsimd_f16_t const* b, simsimd_size_t n) { float32x4_t sum_vec = vdupq_n_f32(0); - simsimd_f32_t epsilon = 1e-3; + simsimd_f32_t epsilon = 1e-7; float32x4_t epsilon_vec = vdupq_n_f32(epsilon); simsimd_size_t i = 0; for (; i + 4 <= n; i += 4) { @@ -191,7 +191,7 @@ __attribute__((target("+simd+fp16"))) // inline static simsimd_f32_t simsimd_neon_f16_js(simsimd_f16_t const* a, simsimd_f16_t const* b, simsimd_size_t n) { float32x4_t sum_vec = vdupq_n_f32(0); - simsimd_f32_t epsilon = 1e-3; + simsimd_f32_t epsilon = 1e-7; float32x4_t epsilon_vec = vdupq_n_f32(epsilon); simsimd_size_t i = 0; for (; i + 4 <= n; i += 4) { @@ -211,11 +211,11 @@ simsimd_neon_f16_js(simsimd_f16_t const* a, simsimd_f16_t const* b, simsimd_size for (; i < n; ++i) { simsimd_f32_t ai = SIMSIMD_UNCOMPRESS_F16(a[i]); simsimd_f32_t bi = SIMSIMD_UNCOMPRESS_F16(b[i]); - simsimd_f32_t mi = ai + bi; + simsimd_f32_t mi = 0.5f * (ai + bi); sum += ai * SIMSIMD_LOG((ai + epsilon) / (mi + epsilon)); sum += bi * SIMSIMD_LOG((bi + epsilon) / (mi + epsilon)); } - return sum / 2; + return sum; } #endif // SIMSIMD_TARGET_ARM_NEON @@ -268,7 +268,7 @@ __attribute__((target("avx2,f16c,fma"))) // inline static simsimd_f32_t simsimd_avx2_f16_kl(simsimd_f16_t const* a, simsimd_f16_t const* b, simsimd_size_t n) { __m256 sum_vec = _mm256_set1_ps(0); - simsimd_f32_t epsilon = 1e-5; + simsimd_f32_t epsilon = 1e-7; __m256 epsilon_vec = _mm256_set1_ps(epsilon); simsimd_size_t i = 0; for (; i + 8 <= n; i += 8) { @@ -300,7 +300,7 @@ __attribute__((target("avx2,f16c,fma"))) // inline static simsimd_f32_t simsimd_avx2_f16_js(simsimd_f16_t const* a, simsimd_f16_t const* b, simsimd_size_t n) { __m256 sum_vec = _mm256_set1_ps(0); - simsimd_f32_t epsilon = 1e-5; + simsimd_f32_t epsilon = 1e-7; __m256 epsilon_vec = _mm256_set1_ps(epsilon); simsimd_size_t i = 0; for (; i + 8 <= n; i += 8) { @@ -374,7 +374,7 @@ __attribute__((target("avx512f,avx512vl,bmi2"))) // inline static simsimd_f32_t simsimd_avx512_f32_kl(simsimd_f32_t const* a, simsimd_f32_t const* b, simsimd_size_t n) { __m512 sum_vec = _mm512_set1_ps(0); - simsimd_f32_t epsilon = 1e-6; + simsimd_f32_t epsilon = 1e-7; __m512 epsilon_vec = _mm512_set1_ps(epsilon); __m512 a_vec, b_vec; @@ -405,7 +405,7 @@ inline static simsimd_f32_t simsimd_avx512_f32_js(simsimd_f32_t const* a, simsimd_f32_t const* b, simsimd_size_t n) { __m512 sum_a_vec = _mm512_set1_ps(0); __m512 sum_b_vec = _mm512_set1_ps(0); - simsimd_f32_t epsilon = 1e-6; + simsimd_f32_t epsilon = 1e-7; __m512 epsilon_vec = _mm512_set1_ps(epsilon); __m512 a_vec, b_vec; @@ -472,7 +472,7 @@ __attribute__((target("avx512f,avx512vl,avx512fp16,bmi2"))) // inline static simsimd_f32_t simsimd_avx512_f16_kl(simsimd_f16_t const* a, simsimd_f16_t const* b, simsimd_size_t n) { __m512h sum_vec = _mm512_set1_ph((_Float16)0); - __m512h epsilon_vec = _mm512_set1_ph((_Float16)1e-6f); + __m512h epsilon_vec = _mm512_set1_ph((_Float16)1e-7); __m512h a_vec, b_vec; simsimd_avx512_f16_kl_cycle: @@ -502,7 +502,7 @@ inline static simsimd_f32_t simsimd_avx512_f16_js(simsimd_f16_t const* a, simsimd_f16_t const* b, simsimd_size_t n) { __m512h sum_a_vec = _mm512_set1_ph((_Float16)0); __m512h sum_b_vec = _mm512_set1_ph((_Float16)0); - __m512h epsilon_vec = _mm512_set1_ph((_Float16)1e-6f); + __m512h epsilon_vec = _mm512_set1_ph((_Float16)1e-7); __m512h a_vec, b_vec; simsimd_avx512_f16_js_cycle: diff --git a/python/lib.c b/python/lib.c index 70f99cb4..a408b3d0 100644 --- a/python/lib.c +++ b/python/lib.c @@ -23,8 +23,8 @@ #define SIMSIMD_TARGET_X86_AVX512 0 #endif -#define SIMSIMD_RSQRT simsimd_approximate_inverse_square_root -#define SIMSIMD_LOG simsimd_approximate_log +#define SIMSIMD_RSQRT(x) simsimd_approximate_inverse_square_root(x) +#define SIMSIMD_LOG(x) simsimd_approximate_log(x) #include "simsimd/simsimd.h" #define PY_SSIZE_T_CLEAN diff --git a/python/test.py b/python/test.py index 95199969..2602bd1a 100644 --- a/python/test.py +++ b/python/test.py @@ -6,7 +6,7 @@ # For normalized distances we use the absolute tolerance, because the result is close to zero. # For unnormalized ones (like squared Euclidean or Jaccard), we use the relative. SIMSIMD_RTOL = 0.2 -SIMSIMD_ATOL = 0.15 +SIMSIMD_ATOL = 0.2 def test_pointers_availability(): @@ -69,7 +69,7 @@ def test_cosine(ndim, dtype): @pytest.mark.repeat(50) -@pytest.mark.parametrize("ndim", [3, 97]) +@pytest.mark.parametrize("ndim", [97, 1536]) @pytest.mark.parametrize("dtype", [np.float32, np.float16]) def test_jensen_shannon(ndim, dtype): """Compares the simd.jensenshannon() function with scipy.spatial.distance.jensenshannon(), measuring the accuracy error for f16, and f32 types."""