Skip to content

Commit

Permalink
Refactor atol function and improve documentation
Browse files Browse the repository at this point in the history
- Add handle_base_prefix and trim_and_handle_sign helper functions
- Rename atol_error to str_to_base_error for clarity
- Update atol docstring for improved clarity

Breaks up the functionality of atol for better readability
and reusability, as suggested in PR modular#3178.
  • Loading branch information
jjvraw committed Jul 5, 2024
1 parent 39d95f0 commit 4085578
Showing 1 changed file with 88 additions and 38 deletions.
126 changes: 88 additions & 38 deletions stdlib/src/builtin/string.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,7 @@ fn _atol(str_ref: StringRef, base: Int = 10) raises -> Int:
if (base != 0) and (base < 2 or base > 36):
raise Error("Base must be >= 2 and <= 36, or 0.")
if not str_ref:
raise Error(_atol_error(base, str_ref))
raise Error(_str_to_base_error(base, str_ref))

var real_base: Int
var ord_num_max: Int
Expand All @@ -229,35 +229,12 @@ fn _atol(str_ref: StringRef, base: Int = 10) raises -> Int:
var is_negative: Bool = False
var start: Int = 0
var str_len = len(str_ref)
var buff = str_ref.unsafe_ptr()

for pos in range(start, str_len):
if _isspace(buff[pos]):
continue
start, is_negative = _trim_and_handle_sign(str_ref, str_len)

if str_ref[pos] == "-":
is_negative = True
start = pos + 1
elif str_ref[pos] == "+":
start = pos + 1
else:
start = pos
break

if str_ref[start] == "0" and start + 1 < str_len:
if base == 2 and (
str_ref[start + 1] == "b" or str_ref[start + 1] == "B"
):
start += 2
elif base == 8 and (
str_ref[start + 1] == "o" or str_ref[start + 1] == "O"
):
start += 2
elif base == 16 and (
str_ref[start + 1] == "x" or str_ref[start + 1] == "X"
):
start += 2
start = _handle_base_prefix(start, str_ref, str_len, base)

var buff = str_ref.unsafe_ptr()
alias ord_0 = ord("0")
# FIXME:
# Change this to `alias` after fixing support for __getitem__ of alias.
Expand All @@ -269,7 +246,7 @@ fn _atol(str_ref: StringRef, base: Int = 10) raises -> Int:
real_base = real_base_new_start[0]
start = real_base_new_start[1]
if real_base == -1:
raise Error(_atol_error(base, str_ref))
raise Error(_str_to_base_error(base, str_ref))
else:
real_base = base

Expand All @@ -292,7 +269,7 @@ fn _atol(str_ref: StringRef, base: Int = 10) raises -> Int:
var ord_current = int(buff[pos])
if ord_current == ord_underscore:
if was_last_digit_undescore:
raise Error(_atol_error(base, str_ref))
raise Error(_str_to_base_error(base, str_ref))
else:
was_last_digit_undescore = True
continue
Expand All @@ -312,29 +289,91 @@ fn _atol(str_ref: StringRef, base: Int = 10) raises -> Int:
start = pos + 1
break
else:
raise Error(_atol_error(base, str_ref))
raise Error(_str_to_base_error(base, str_ref))
if pos + 1 < str_len and not _isspace(buff[pos + 1]):
var nextresult = result * real_base
if nextresult < result:
raise Error(
_atol_error(base, str_ref)
_str_to_base_error(base, str_ref)
+ " String expresses an integer too large to store in Int."
)
result = nextresult

if was_last_digit_undescore or (not found_valid_chars_after_start):
raise Error(_atol_error(base, str_ref))
raise Error(_str_to_base_error(base, str_ref))

if has_space_after_number:
for pos in range(start, str_len):
if not _isspace(buff[pos]):
raise Error(_atol_error(base, str_ref))
raise Error(_str_to_base_error(base, str_ref))
if is_negative:
result = -result
return result


fn _atol_error(base: Int, str_ref: StringRef) -> String:
@always_inline
fn _trim_and_handle_sign(str_ref: StringRef, str_len: Int) -> (Int, Bool):
"""Trims leading whitespace and handles the sign of the number in the string.
Args:
str_ref: A StringRef containing the number to parse.
str_len: The length of the string.
Returns:
A tuple containing:
- The starting index of the number after whitespace and sign.
- A boolean indicating whether the number is negative.
"""
var buff = str_ref.unsafe_ptr()
var is_negative: Bool = False
var start: Int = 0
for pos in range(start, str_len):
if _isspace(buff[pos]):
continue

if str_ref[pos] == "-":
is_negative = True
start = pos + 1
elif str_ref[pos] == "+":
start = pos + 1
else:
start = pos
break

return start, is_negative


@always_inline
fn _handle_base_prefix(
pos: Int, str_ref: StringRef, str_len: Int, base: Int
) -> Int:
"""Adjusts the starting position if a valid base prefix is present.
Handles "0b"/"0B" for base 2, "0o"/"0O" for base 8, and "0x"/"0X" for base 16.
Only adjusts if the base matches the prefix.
Args:
pos: Current position in the string.
str_ref: The input string.
str_len: Length of the input string.
base: The specified base.
Returns:
Updated position after the prefix, if applicable.
"""
var start = pos
if str_ref[start] == "0" and start + 1 < str_len:
if base == 2 and (
str_ref[start + 1] == "b" or str_ref[start + 1] == "B"
):
start += 2
elif base == 8 and (
str_ref[start + 1] == "o" or str_ref[start + 1] == "O"
):
start += 2
elif base == 16 and (
str_ref[start + 1] == "x" or str_ref[start + 1] == "X"
):
start += 2
return start


fn _str_to_base_error(base: Int, str_ref: StringRef) -> String:
return (
"String is not convertible to integer with base "
+ str(base)
Expand Down Expand Up @@ -381,19 +420,30 @@ fn _identify_base(str_ref: StringRef, start: Int) -> Tuple[Int, Int]:
fn atol(str: String, base: Int = 10) raises -> Int:
"""Parses and returns the given string as an integer in the given base.
For example, `atol("19")` returns `19`. If base is 0 the the string is
parsed as an Integer literal, see: https://docs.python.org/3/reference/lexical_analysis.html#integers.
For example, `atol("32")` returns `32`, and `atol("FF", 16)` returns `255`. If base is set to 0, the string is parsed as an Integer literal, with the following considerations:
- '0b' or '0B' prefix indicates binary (base 2)
- '0o' or '0O' prefix indicates octal (base 8)
- '0x' or '0X' prefix indicates hexadecimal (base 16)
- Without a prefix, it's treated as decimal (base 10)
Raises:
If the given string cannot be parsed as an integer value. For example in
`atol("hi")`.
- If the given string cannot be parsed as an integer value. For example in
`atol("Mojo")`.
- Incorrect base is provided.
Args:
str: A string to be parsed as an integer in the given base.
base: Base used for conversion, value must be between 2 and 36, or 0.
Returns:
An integer value that represents the string, or otherwise raises.
Notes:
This follows [Python's integer literals](\
https://docs.python.org/3/reference/lexical_analysis.html#integers).
See Also:
:func:`stol`: A similar function that returns both the parsed integer and the remaining unparsed string.
"""
return _atol(str._strref_dangerous(), base)

Expand Down

0 comments on commit 4085578

Please sign in to comment.