Skip to content

Commit

Permalink
atol bug-fix for leading underscores
Browse files Browse the repository at this point in the history
- Support leading underscores (bug fix) for prefixed literal
- Added relevant tests

Signed-off-by: Joshua James Venter <[email protected]>
  • Loading branch information
jjvraw committed Jul 11, 2024
1 parent ce75e94 commit f4a054a
Show file tree
Hide file tree
Showing 3 changed files with 51 additions and 9 deletions.
7 changes: 7 additions & 0 deletions docs/changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -446,6 +446,13 @@ future and `StringSlice.__len__` now does return the Unicode codepoints length.
- `sort(ptr, len)` a pointer and length (can change to Span in future)
- `sort[type, cmp_fn](ptr, len)` above with custom compare
- The `atol` function now correctly supports leading underscores,
(e.g.`atol("0x_ff", 0)`), when the appropriate base is specified or inferred
(base 0). non-base-10 integer literals as per Python's [Integer Literals](\
<https://docs.python.org/3/reference/lexical_analysis.html#integers>).
([PR #3180](https://github.com/modularml/mojo/pull/3180)
by [@jjvraw](https://github.com/jjvraw))
### ❌ Removed
- It is no longer possible to cast (implicitly or explicitly) from `Reference`
Expand Down
12 changes: 8 additions & 4 deletions stdlib/src/builtin/string.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,7 @@ fn _atol(str_ref: StringRef, base: Int = 10) raises -> Int:
var ord_letter_max = (-1, -1)
var result = 0
var is_negative: Bool = False
var has_prefix: Bool = False
var start: Int = 0
var str_len = len(str_ref)
var buff = str_ref.unsafe_ptr()
Expand All @@ -250,14 +251,17 @@ fn _atol(str_ref: StringRef, base: Int = 10) raises -> Int:
str_ref[start + 1] == "b" or str_ref[start + 1] == "B"
):
start += 2
has_prefix = True
elif base == 8 and (
str_ref[start + 1] == "o" or str_ref[start + 1] == "O"
):
start += 2
has_prefix = True
elif base == 16 and (
str_ref[start + 1] == "x" or str_ref[start + 1] == "X"
):
start += 2
has_prefix = True

alias ord_0 = ord("0")
# FIXME:
Expand All @@ -269,6 +273,7 @@ fn _atol(str_ref: StringRef, base: Int = 10) raises -> Int:
var real_base_new_start = _identify_base(str_ref, start)
real_base = real_base_new_start[0]
start = real_base_new_start[1]
has_prefix = real_base != 10
if real_base == -1:
raise Error(_atol_error(base, str_ref))
else:
Expand All @@ -285,10 +290,9 @@ fn _atol(str_ref: StringRef, base: Int = 10) raises -> Int:

var found_valid_chars_after_start = False
var has_space_after_number = False
# single underscores are only allowed between digits
# starting "was_last_digit_undescore" to true such that
# if the first digit is an undesrcore an error is raised
var was_last_digit_undescore = True
# Prefixed integer literals with real_base 2, 8, 16 may begin with leading
# underscores under the conditions they have a prefix
var was_last_digit_undescore = not (real_base in (2, 8, 16) and has_prefix)
for pos in range(start, str_len):
var ord_current = int(buff[pos])
if ord_current == ord_underscore:
Expand Down
41 changes: 36 additions & 5 deletions stdlib/test/builtin/test_string.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -365,6 +365,9 @@ def test_atol():
assert_equal(10, atol("0o12", 8))
assert_equal(10, atol("0O12", 8))
assert_equal(35, atol("Z", 36))
assert_equal(255, atol("0x_00_ff", 16))
assert_equal(18, atol("0b0001_0010", 2))
assert_equal(18, atol("0b_000_1001_0", 2))

# Negative cases
with assert_raises(
Expand Down Expand Up @@ -398,12 +401,37 @@ def test_atol():
):
_ = atol("5", 5)

with assert_raises(
contains="String is not convertible to integer with base 10: '0x_ff'"
):
_ = atol("0x_ff")

with assert_raises(
contains="String is not convertible to integer with base 3: '_12'"
):
_ = atol("_12", 3)

with assert_raises(contains="Base must be >= 2 and <= 36, or 0."):
_ = atol("0", 1)

with assert_raises(contains="Base must be >= 2 and <= 36, or 0."):
_ = atol("0", 37)

with assert_raises(
contains="String is not convertible to integer with base 16: '_ff'"
):
_ = atol("_ff", base=16)

with assert_raises(
contains="String is not convertible to integer with base 2: ' _01'"
):
_ = atol(" _01", base=2)

with assert_raises(
contains="String is not convertible to integer with base 10: '0x_ff'"
):
_ = atol("0x_ff")

with assert_raises(
contains="String is not convertible to integer with base 10: ''"
):
Expand Down Expand Up @@ -433,6 +461,14 @@ def test_atol_base_0():

assert_equal(0, atol("0X0", base=0))

assert_equal(255, atol("0x_00_ff", base=0))

assert_equal(18, atol("0b_0001_0010", base=0))
assert_equal(18, atol("0b000_1001_0", base=0))

assert_equal(10, atol("0o_000_12", base=0))
assert_equal(10, atol("0o00_12", base=0))

with assert_raises(
contains="String is not convertible to integer with base 0: ' 0x'"
):
Expand All @@ -453,11 +489,6 @@ def test_atol_base_0():
):
_ = atol("0r100", base=0)

with assert_raises(
contains="String is not convertible to integer with base 0: '0b_0'"
):
_ = atol("0b_0", base=0)

with assert_raises(
contains="String is not convertible to integer with base 0: '0xf__f'"
):
Expand Down

0 comments on commit f4a054a

Please sign in to comment.