From e0946f22edf58c7b5ba20b70691cac1840e74f30 Mon Sep 17 00:00:00 2001 From: Joshua James Venter <25530194@sun.ac.za> Date: Fri, 5 Jul 2024 23:19:25 +0200 Subject: [PATCH] atol bug-fix for leading underscores - Support leading underscores (bug fix) for prefixed literal - Added relevant tests - Updated docstring for clarity Signed-off-by: Joshua James Venter --- docs/changelog.md | 4 +++ stdlib/src/builtin/string.mojo | 43 +++++++++++++++++++--------- stdlib/test/builtin/test_string.mojo | 16 +++++++---- 3 files changed, 45 insertions(+), 18 deletions(-) diff --git a/docs/changelog.md b/docs/changelog.md index 961c586dee8..316183e83da 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -399,6 +399,10 @@ what we publish. - `LegacyPointer.load/store` are now removed. It's use is replaced with `__getitem__` or `__setitem__`. +- The `atol` function now correctly supports leading underscores, + (e.g.`atol("0x_ff", 0)`), for non-base-10 integer literals as per Python's + [Integer Literals](https://docs.python.org/3/reference/lexical_analysis.html#integers). + ### ❌ Removed - It is no longer possible to cast (implicitly or explicitly) from `Reference` diff --git a/stdlib/src/builtin/string.mojo b/stdlib/src/builtin/string.mojo index 21bf6bf690c..5566091c186 100644 --- a/stdlib/src/builtin/string.mojo +++ b/stdlib/src/builtin/string.mojo @@ -287,17 +287,17 @@ fn _atol(str_ref: StringRef, base: Int = 10) raises -> Int: # single underscores are only allowed between digits # starting "was_last_digit_undescore" to true such that # if the first digit is an undesrcore an error is raised - var was_last_digit_undescore = True + var was_last_digit_underscore = real_base == 10 for pos in range(start, str_len): var ord_current = int(buff[pos]) if ord_current == ord_underscore: - if was_last_digit_undescore: + if was_last_digit_underscore: raise Error(_atol_error(base, str_ref)) else: - was_last_digit_undescore = True + was_last_digit_underscore = True continue else: - was_last_digit_undescore = False + was_last_digit_underscore = False if ord_0 <= ord_current <= ord_num_max: result += ord_current - ord_0 found_valid_chars_after_start = True @@ -322,7 +322,7 @@ fn _atol(str_ref: StringRef, base: Int = 10) raises -> Int: ) result = nextresult - if was_last_digit_undescore or (not found_valid_chars_after_start): + if was_last_digit_underscore or (not found_valid_chars_after_start): raise Error(_atol_error(base, str_ref)) if has_space_after_number: @@ -381,20 +381,37 @@ fn _identify_base(str_ref: StringRef, start: Int) -> Tuple[Int, Int]: fn atol(str: String, base: Int = 10) raises -> Int: """Parses and returns the given string as an integer in the given base. - For example, `atol("19")` returns `19`. If base is 0 the the string is - parsed as an Integer literal, see: https://docs.python.org/3/reference/lexical_analysis.html#integers. - - Raises: - If the given string cannot be parsed as an integer value. For example in - `atol("hi")`. + If base is set to 0, the string is parsed as an Integer literal, with the + following considerations: + - '0b' or '0B' prefix indicates binary (base 2) + - '0o' or '0O' prefix indicates octal (base 8) + - '0x' or '0X' prefix indicates hexadecimal (base 16) + - Without a prefix, it's treated as decimal (base 10) Args: str: A string to be parsed as an integer in the given base. base: Base used for conversion, value must be between 2 and 36, or 0. Returns: - An integer value that represents the string, or otherwise raises. - """ + An integer value that represents the string. + + Raises: + If the given string cannot be parsed as an integer value or if an + incorrect base is provided. + + Examples: + >>> atol("32") + 32 + >>> atol("FF", 16) + 255 + >>> atol("0xFF", 0) + 255 + >>> atol("0b1010", 0) + 10 + + Notes: + This follows [Python's integer literals]( + https://docs.python.org/3/reference/lexical_analysis.html#integers).""" return _atol(str._strref_dangerous(), base) diff --git a/stdlib/test/builtin/test_string.mojo b/stdlib/test/builtin/test_string.mojo index 541ef040a38..9d53ee5aef7 100644 --- a/stdlib/test/builtin/test_string.mojo +++ b/stdlib/test/builtin/test_string.mojo @@ -365,6 +365,9 @@ def test_atol(): assert_equal(10, atol("0o12", 8)) assert_equal(10, atol("0O12", 8)) assert_equal(35, atol("Z", 36)) + assert_equal(255, atol("0x_00_ff", 16)) + assert_equal(18, atol("0b0001_0010", 2)) + assert_equal(18, atol("0b_000_1001_0", 2)) # Negative cases with assert_raises( @@ -433,6 +436,14 @@ def test_atol_base_0(): assert_equal(0, atol("0X0", base=0)) + assert_equal(255, atol("0x_00_ff", base=0)) + + assert_equal(18, atol("0b_0001_0010", base=0)) + assert_equal(18, atol("0b000_1001_0", base=0)) + + assert_equal(10, atol("0o_000_12", base=0)) + assert_equal(10, atol("0o00_12", base=0)) + with assert_raises( contains="String is not convertible to integer with base 0: ' 0x'" ): @@ -453,11 +464,6 @@ def test_atol_base_0(): ): _ = atol("0r100", base=0) - with assert_raises( - contains="String is not convertible to integer with base 0: '0b_0'" - ): - _ = atol("0b_0", base=0) - with assert_raises( contains="String is not convertible to integer with base 0: '0xf__f'" ):