Skip to content

Commit

Permalink
Fix: Avoid LibC symbols (#230)
Browse files Browse the repository at this point in the history
  • Loading branch information
ashvardanian authored Nov 8, 2024
1 parent f333bb5 commit 660c7dd
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 17 deletions.
6 changes: 6 additions & 0 deletions c/lib.c
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,12 @@
#define SIMSIMD_NATIVE_F16 0
#define SIMSIMD_NATIVE_BF16 0

/* Override the primary serial operations to avoid the LibC dependency.
*/
#define SIMSIMD_SQRT(x) simsimd_approximate_square_root(x)
#define SIMSIMD_RSQRT(x) simsimd_approximate_inverse_square_root(x)
#define SIMSIMD_LOG(x) simsimd_approximate_log(x)

/* Depending on the Operating System, the following intrinsics are available
* on recent compiler toolchains:
*
Expand Down
51 changes: 34 additions & 17 deletions include/simsimd/types.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
#elif defined(__GNUC__) || defined(__clang__)
#define SIMSIMD_DYNAMIC __attribute__((visibility("default")))
#define SIMSIMD_PUBLIC __attribute__((unused)) inline static
#define SIMSIMD_INTERNAL __attribute__((always_inline)) inline static
#define SIMSIMD_INTERNAL inline static
#else
#define SIMSIMD_DYNAMIC
#define SIMSIMD_PUBLIC inline static
Expand Down Expand Up @@ -436,16 +436,20 @@ SIMSIMD_STATIC_ASSERT(sizeof(simsimd_bf16_t) == 2, simsimd_bf16_t_must_be_2_byte
#endif

#if !defined(SIMSIMD_F32_TO_I8)
#define SIMSIMD_F32_TO_I8(x, y) *(y) = (simsimd_i8_t)fminf(fmaxf(roundf(x), -128), 127)
#define SIMSIMD_F32_TO_I8(x, y) \
*(y) = (simsimd_i8_t)((x) > 127 ? 127 : ((x) < -128 ? -128 : (int)((x) + ((x) < 0 ? -0.5f : 0.5f))))
#endif
#if !defined(SIMSIMD_F32_TO_U8)
#define SIMSIMD_F32_TO_U8(x, y) *(y) = (simsimd_u8_t)fminf(fmaxf(roundf(x), 0), 255)
#define SIMSIMD_F32_TO_U8(x, y) \
*(y) = (simsimd_u8_t)((x) > 255 ? 255 : ((x) < 0 ? 0 : (int)((x) + ((x) < 0 ? -0.5f : 0.5f))))
#endif
#if !defined(SIMSIMD_F64_TO_I8)
#define SIMSIMD_F64_TO_I8(x, y) *(y) = (simsimd_i8_t)fmin(fmax(round(x), -128), 127)
#define SIMSIMD_F64_TO_I8(x, y) \
*(y) = (simsimd_i8_t)((x) > 127 ? 127 : ((x) < -128 ? -128 : (int)((x) + ((x) < 0 ? -0.5 : 0.5))))
#endif
#if !defined(SIMSIMD_F64_TO_U8)
#define SIMSIMD_F64_TO_U8(x, y) *(y) = (simsimd_u8_t)fmin(fmax(round(x), 0), 255)
#define SIMSIMD_F64_TO_U8(x, y) \
*(y) = (simsimd_u8_t)((x) > 255 ? 255 : ((x) < 0 ? 0 : (int)((x) + ((x) < 0 ? -0.5 : 0.5))))
#endif

/** @brief Convenience type for half-precision floating-point type conversions. */
Expand All @@ -467,20 +471,33 @@ typedef union {
* https://web.archive.org/web/20210208132927/http://assemblyrequired.crashworks.org/timing-square-root/
* https://stackoverflow.com/a/41460625/2766161
*/
SIMSIMD_PUBLIC simsimd_f32_t simsimd_approximate_inverse_square_root(simsimd_f32_t number) {
SIMSIMD_INTERNAL simsimd_f32_t simsimd_approximate_inverse_square_root(simsimd_f32_t number) {
simsimd_f32i32_t conv;
conv.f = number;
conv.i = 0x5F1FFFF9 - (conv.i >> 1);
// Refine using a Newton-Raphson step for better accuracy
conv.f *= 0.703952253f * (2.38924456f - number * conv.f * conv.f);
return conv.f;
}

/**
* @brief Approximates `sqrt(x)` using the fast inverse square root trick
* with adjustments for direct square root approximation.
*
* Similar to `rsqrt` approximation but multiplies by `number` to get `sqrt`.
* This technique is useful where `sqrt` approximation is needed in performance-critical code,
* though modern hardware provides optimized alternatives.
*/
SIMSIMD_INTERNAL simsimd_f32_t simsimd_approximate_square_root(simsimd_f32_t number) {
return number * simsimd_approximate_inverse_square_root(number);
}

/**
* @brief Computes `log(x)` using the Mercator series.
* The series converges to the natural logarithm for args between -1 and 1.
* Published in 1668 in "Logarithmotechnia".
*/
SIMSIMD_PUBLIC simsimd_f32_t simsimd_approximate_log(simsimd_f32_t number) {
SIMSIMD_INTERNAL simsimd_f32_t simsimd_approximate_log(simsimd_f32_t number) {
simsimd_f32_t x = number - 1;
simsimd_f32_t x2 = x * x;
simsimd_f32_t x3 = x * x * x;
Expand All @@ -497,7 +514,7 @@ SIMSIMD_PUBLIC simsimd_f32_t simsimd_approximate_log(simsimd_f32_t number) {
* https://gist.github.com/milhidaka/95863906fe828198f47991c813dbe233
* https://github.com/OpenCyphal/libcanard/blob/636795f4bc395f56af8d2c61d3757b5e762bb9e5/canard.c#L811-L834
*/
SIMSIMD_PUBLIC simsimd_f32_t simsimd_f16_to_f32(simsimd_f16_t const *x_ptr) {
SIMSIMD_INTERNAL simsimd_f32_t simsimd_f16_to_f32(simsimd_f16_t const *x_ptr) {
unsigned short x = *(unsigned short const *)x_ptr;
unsigned int exponent = (x & 0x7C00) >> 10;
unsigned int mantissa = (x & 0x03FF) << 13;
Expand All @@ -519,7 +536,7 @@ SIMSIMD_PUBLIC simsimd_f32_t simsimd_f16_to_f32(simsimd_f16_t const *x_ptr) {
* https://gist.github.com/milhidaka/95863906fe828198f47991c813dbe233
* https://github.com/OpenCyphal/libcanard/blob/636795f4bc395f56af8d2c61d3757b5e762bb9e5/canard.c#L811-L834
*/
SIMSIMD_PUBLIC void simsimd_f32_to_f16(simsimd_f32_t x, simsimd_f16_t *result_ptr) {
SIMSIMD_INTERNAL void simsimd_f32_to_f16(simsimd_f32_t x, simsimd_f16_t *result_ptr) {
simsimd_f32i32_t conv;
conv.f = x;
unsigned int b = conv.i + 0x00001000;
Expand All @@ -538,7 +555,7 @@ SIMSIMD_PUBLIC void simsimd_f32_to_f16(simsimd_f32_t x, simsimd_f16_t *result_pt
* https://stackoverflow.com/questions/55253233/convert-fp32-to-bfloat16-in-c/55254307#55254307
* https://cloud.google.com/blog/products/ai-machine-learning/bfloat16-the-secret-to-high-performance-on-cloud-tpus
*/
SIMSIMD_PUBLIC simsimd_f32_t simsimd_bf16_to_f32(simsimd_bf16_t const *x_ptr) {
SIMSIMD_INTERNAL simsimd_f32_t simsimd_bf16_to_f32(simsimd_bf16_t const *x_ptr) {
unsigned short x = *(unsigned short const *)x_ptr;
simsimd_f32i32_t conv;
conv.i = x << 16; // Zero extends the mantissa
Expand All @@ -551,7 +568,7 @@ SIMSIMD_PUBLIC simsimd_f32_t simsimd_bf16_to_f32(simsimd_bf16_t const *x_ptr) {
* https://stackoverflow.com/questions/55253233/convert-fp32-to-bfloat16-in-c/55254307#55254307
* https://cloud.google.com/blog/products/ai-machine-learning/bfloat16-the-secret-to-high-performance-on-cloud-tpus
*/
SIMSIMD_PUBLIC void simsimd_f32_to_bf16(simsimd_f32_t x, simsimd_bf16_t *result_ptr) {
SIMSIMD_INTERNAL void simsimd_f32_to_bf16(simsimd_f32_t x, simsimd_bf16_t *result_ptr) {
simsimd_f32i32_t conv;
conv.f = x;
conv.i += 0x8000; // Rounding is optional
Expand All @@ -561,12 +578,12 @@ SIMSIMD_PUBLIC void simsimd_f32_to_bf16(simsimd_f32_t x, simsimd_bf16_t *result_
*(unsigned short *)result_ptr = (unsigned short)conv.i;
}

SIMSIMD_PUBLIC simsimd_u32_t simsimd_u32_rol(simsimd_u32_t x, int n) { return (x << n) | (x >> (32 - n)); }
SIMSIMD_PUBLIC simsimd_u16_t simsimd_u16_rol(simsimd_u16_t x, int n) { return (x << n) | (x >> (16 - n)); }
SIMSIMD_PUBLIC simsimd_u8_t simsimd_u8_rol(simsimd_u8_t x, int n) { return (x << n) | (x >> (8 - n)); }
SIMSIMD_PUBLIC simsimd_u32_t simsimd_u32_ror(simsimd_u32_t x, int n) { return (x >> n) | (x << (32 - n)); }
SIMSIMD_PUBLIC simsimd_u16_t simsimd_u16_ror(simsimd_u16_t x, int n) { return (x >> n) | (x << (16 - n)); }
SIMSIMD_PUBLIC simsimd_u8_t simsimd_u8_ror(simsimd_u8_t x, int n) { return (x >> n) | (x << (8 - n)); }
SIMSIMD_INTERNAL simsimd_u32_t simsimd_u32_rol(simsimd_u32_t x, int n) { return (x << n) | (x >> (32 - n)); }
SIMSIMD_INTERNAL simsimd_u16_t simsimd_u16_rol(simsimd_u16_t x, int n) { return (x << n) | (x >> (16 - n)); }
SIMSIMD_INTERNAL simsimd_u8_t simsimd_u8_rol(simsimd_u8_t x, int n) { return (x << n) | (x >> (8 - n)); }
SIMSIMD_INTERNAL simsimd_u32_t simsimd_u32_ror(simsimd_u32_t x, int n) { return (x >> n) | (x << (32 - n)); }
SIMSIMD_INTERNAL simsimd_u16_t simsimd_u16_ror(simsimd_u16_t x, int n) { return (x >> n) | (x << (16 - n)); }
SIMSIMD_INTERNAL simsimd_u8_t simsimd_u8_ror(simsimd_u8_t x, int n) { return (x >> n) | (x << (8 - n)); }

#ifdef __cplusplus
} // extern "C"
Expand Down

3 comments on commit 660c7dd

@19y
Copy link

@19y 19y commented on 660c7dd Nov 12, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why can this modification solve the bug of "implicit declaration of function 'lgammal_r'; did you mean 'lgamal'"?

@ashvardanian
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because this and related commits avoid the LibC math ops imported from <math.h>.

@19y
Copy link

@19y 19y commented on 660c7dd Nov 12, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

由于 this 提交和相关提交避免了从 .<math.h>

Thank you for your answer

Please sign in to comment.