Skip to content

Commit

Permalink
remove numpy dependency
Browse files Browse the repository at this point in the history
  • Loading branch information
mammothb committed Aug 31, 2024
1 parent 8b45841 commit fa0ca70
Show file tree
Hide file tree
Showing 3 changed files with 190 additions and 191 deletions.
1 change: 1 addition & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
*.py eol=lf
1 change: 0 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,5 @@ editdistpy>=0.1.3
# For testing
coverage==7.4.4
importlib-resources>=6.3.2
numpy>=1.19.5
pytest==8.1.1
pytest-cov==4.1.0
379 changes: 189 additions & 190 deletions tests/test_editdistance.py
Original file line number Diff line number Diff line change
@@ -1,190 +1,189 @@
import sys
from itertools import combinations, permutations

import numpy as np
import pytest

from symspellpy.editdistance import (
AbstractDistanceComparer,
DamerauOsa,
DamerauOsaFast,
DistanceAlgorithm,
EditDistance,
Levenshtein,
LevenshteinFast,
)

SHORT_STRING = "string"
LONG_STRING = "long_string"
VERY_LONG_STRING = "very_long_string"


def expected_levenshtein(string_1, string_2, max_distance):
max_distance = int(min(2**31 - 1, max_distance))
len_1 = len(string_1)
len_2 = len(string_2)
d = np.zeros((len_1 + 1, len_2 + 1))
for i in range(len_1 + 1):
d[i, 0] = i
for i in range(len_2 + 1):
d[0, i] = i
for j in range(1, len_2 + 1):
for i in range(1, len_1 + 1):
if string_1[i - 1] == string_2[j - 1]:
# no operation
d[i, j] = d[i - 1, j - 1]
else:
d[i, j] = min(
min(d[i - 1, j] + 1, d[i, j - 1] + 1), d[i - 1, j - 1] + 1
)
distance = d[len_1, len_2]
return distance if distance <= max_distance else -1


def expected_damerau_osa(string_1, string_2, max_distance):
max_distance = int(min(2**31 - 1, max_distance))
len_1 = len(string_1)
len_2 = len(string_2)
d = np.zeros((len_1 + 1, len_2 + 1))
for i in range(len_1 + 1):
d[i, 0] = i
for i in range(len_2 + 1):
d[0, i] = i
for i in range(1, len_1 + 1):
for j in range(1, len_2 + 1):
cost = 0 if string_1[i - 1] == string_2[j - 1] else 1
d[i, j] = min(min(d[i - 1, j] + 1, d[i, j - 1] + 1), d[i - 1, j - 1] + cost)
if (
i > 1
and j > 1
and string_1[i - 1] == string_2[j - 2]
and string_1[i - 2] == string_2[j - 1]
):
d[i, j] = min(d[i, j], d[i - 2, j - 2] + cost)
distance = d[len_1, len_2]
return distance if distance <= max_distance else -1


@pytest.fixture(
params=["damerau_osa", "levenshtein", "damerau_osa_fast", "levenshtein_fast"]
)
def get_comparer(request):
comparer_dict = {
"damerau_osa": {"actual": DamerauOsa(), "expected": expected_damerau_osa},
"levenshtein": {"actual": Levenshtein(), "expected": expected_levenshtein},
"damerau_osa_fast": {
"actual": DamerauOsaFast(),
"expected": expected_damerau_osa,
},
"levenshtein_fast": {
"actual": LevenshteinFast(),
"expected": expected_levenshtein,
},
}
yield comparer_dict[request.param]["actual"], comparer_dict[request.param][
"expected"
]


@pytest.fixture(
params=["damerau_osa", "levenshtein", "damerau_osa_fast", "levenshtein_fast"]
)
def get_edit_distance(request):
comparer_dict = {
"damerau_osa": {
"actual": EditDistance(DistanceAlgorithm.DAMERAU_OSA),
"expected": DamerauOsa,
},
"levenshtein": {
"actual": EditDistance(DistanceAlgorithm.LEVENSHTEIN),
"expected": Levenshtein,
},
"damerau_osa_fast": {
"actual": EditDistance(DistanceAlgorithm.DAMERAU_OSA_FAST),
"expected": DamerauOsaFast,
},
"levenshtein_fast": {
"actual": EditDistance(DistanceAlgorithm.LEVENSHTEIN_FAST),
"expected": LevenshteinFast,
},
}
yield comparer_dict[request.param]["actual"], comparer_dict[request.param][
"expected"
]


@pytest.fixture
def get_short_and_long_strings():
return [
(SHORT_STRING, None, {"null": len(SHORT_STRING), "zero": -1, "neg": -1}),
(LONG_STRING, None, {"null": -1, "zero": -1, "neg": -1}),
(None, SHORT_STRING, {"null": len(SHORT_STRING), "zero": -1, "neg": -1}),
(None, LONG_STRING, {"null": -1, "zero": -1, "neg": -1}),
(SHORT_STRING, SHORT_STRING, {"null": 0, "zero": 0, "neg": 0}),
(None, None, {"null": 0, "zero": 0, "neg": 0}),
]


@pytest.fixture(params=[0, 1, 3, sys.maxsize])
def get_strings(request):
alphabet = "abcd"
strings = [""]
for i in range(1, len(alphabet) + 1):
for combi in combinations(alphabet, i):
strings += ["".join(p) for p in permutations(combi)]
yield strings, request.param


class TestEditDistance:
def test_unknown_distance_algorithm(self):
with pytest.raises(ValueError) as excinfo:
_ = EditDistance(2)
assert "unknown distance algorithm" == str(excinfo.value)

def test_abstract_distance_comparer(self):
with pytest.raises(TypeError) as excinfo:
comparer = AbstractDistanceComparer()
_ = comparer.distance("string_1", "string_2", 10)
assert str(excinfo.value).startswith(
"Can't instantiate abstract class AbstractDistanceComparer"
)

def test_internal_distance_comparer(self, get_edit_distance):
edit_distance, expected = get_edit_distance
assert isinstance(edit_distance._distance_comparer, expected)

def test_comparer_match_ref(self, get_comparer, get_strings):
comparer, expected = get_comparer
strings, max_distance = get_strings

for s1 in strings:
for s2 in strings:
assert expected(s1, s2, max_distance) == comparer.distance(
s1, s2, max_distance
)

def test_comparer_null_distance(self, get_comparer, get_short_and_long_strings):
comparer, _ = get_comparer

for s1, s2, expected in get_short_and_long_strings:
distance = comparer.distance(s1, s2, 10)
assert expected["null"] == distance

def test_comparer_negative_max_distance(
self, get_comparer, get_short_and_long_strings
):
comparer, _ = get_comparer

for s1, s2, expected in get_short_and_long_strings:
distance = comparer.distance(s1, s2, 0)
assert expected["zero"] == distance

for s1, s2, expected in get_short_and_long_strings:
distance = comparer.distance(s1, s2, 0)
assert expected["neg"] == distance

def test_comparer_very_long_string(self, get_comparer):
comparer, _ = get_comparer
distance = comparer.distance(SHORT_STRING, VERY_LONG_STRING, 5)

assert -1 == distance
import sys
from itertools import combinations, permutations

import pytest

from symspellpy.editdistance import (
AbstractDistanceComparer,
DamerauOsa,
DamerauOsaFast,
DistanceAlgorithm,
EditDistance,
Levenshtein,
LevenshteinFast,
)

SHORT_STRING = "string"
LONG_STRING = "long_string"
VERY_LONG_STRING = "very_long_string"


def expected_levenshtein(string_1, string_2, max_distance):
max_distance = int(min(2**31 - 1, max_distance))
len_1 = len(string_1)
len_2 = len(string_2)
d = [[0] * (len_2 + 1) for _ in range(len_1 + 1)]
for i in range(len_1 + 1):
d[i][0] = i
for i in range(len_2 + 1):
d[0][i] = i
for j in range(1, len_2 + 1):
for i in range(1, len_1 + 1):
if string_1[i - 1] == string_2[j - 1]:
# no operation
d[i][j] = d[i - 1][j - 1]
else:
d[i][j] = min(d[i - 1][j] + 1, d[i][j - 1] + 1, d[i - 1][j - 1] + 1)
distance = d[len_1][len_2]
return distance if distance <= max_distance else -1


def expected_damerau_osa(string_1, string_2, max_distance):
max_distance = int(min(2**31 - 1, max_distance))
len_1 = len(string_1)
len_2 = len(string_2)
d = [[0] * (len_2 + 1) for _ in range(len_1 + 1)]
for i in range(len_1 + 1):
d[i][0] = i
for i in range(len_2 + 1):
d[0][i] = i
for i in range(1, len_1 + 1):
for j in range(1, len_2 + 1):
cost = 0 if string_1[i - 1] == string_2[j - 1] else 1
d[i][j] = min(d[i - 1][j] + 1, d[i][j - 1] + 1, d[i - 1][j - 1] + cost)
if (
i > 1
and j > 1
and string_1[i - 1] == string_2[j - 2]
and string_1[i - 2] == string_2[j - 1]
):
d[i][j] = min(d[i][j], d[i - 2][j - 2] + cost)
distance = d[len_1][len_2]
return distance if distance <= max_distance else -1


@pytest.fixture(
params=["damerau_osa", "levenshtein", "damerau_osa_fast", "levenshtein_fast"]
)
def get_comparer(request):
comparer_dict = {
"damerau_osa": {"actual": DamerauOsa(), "expected": expected_damerau_osa},
"levenshtein": {"actual": Levenshtein(), "expected": expected_levenshtein},
"damerau_osa_fast": {
"actual": DamerauOsaFast(),
"expected": expected_damerau_osa,
},
"levenshtein_fast": {
"actual": LevenshteinFast(),
"expected": expected_levenshtein,
},
}
yield (
comparer_dict[request.param]["actual"],
comparer_dict[request.param]["expected"],
)


@pytest.fixture(
params=["damerau_osa", "levenshtein", "damerau_osa_fast", "levenshtein_fast"]
)
def get_edit_distance(request):
comparer_dict = {
"damerau_osa": {
"actual": EditDistance(DistanceAlgorithm.DAMERAU_OSA),
"expected": DamerauOsa,
},
"levenshtein": {
"actual": EditDistance(DistanceAlgorithm.LEVENSHTEIN),
"expected": Levenshtein,
},
"damerau_osa_fast": {
"actual": EditDistance(DistanceAlgorithm.DAMERAU_OSA_FAST),
"expected": DamerauOsaFast,
},
"levenshtein_fast": {
"actual": EditDistance(DistanceAlgorithm.LEVENSHTEIN_FAST),
"expected": LevenshteinFast,
},
}
yield (
comparer_dict[request.param]["actual"],
comparer_dict[request.param]["expected"],
)


@pytest.fixture
def get_short_and_long_strings():
return [
(SHORT_STRING, None, {"null": len(SHORT_STRING), "zero": -1, "neg": -1}),
(LONG_STRING, None, {"null": -1, "zero": -1, "neg": -1}),
(None, SHORT_STRING, {"null": len(SHORT_STRING), "zero": -1, "neg": -1}),
(None, LONG_STRING, {"null": -1, "zero": -1, "neg": -1}),
(SHORT_STRING, SHORT_STRING, {"null": 0, "zero": 0, "neg": 0}),
(None, None, {"null": 0, "zero": 0, "neg": 0}),
]


@pytest.fixture(params=[0, 1, 3, sys.maxsize])
def get_strings(request):
alphabet = "abcd"
strings = [""]
for i in range(1, len(alphabet) + 1):
for combi in combinations(alphabet, i):
strings += ["".join(p) for p in permutations(combi)]
yield strings, request.param


class TestEditDistance:
def test_unknown_distance_algorithm(self):
with pytest.raises(ValueError) as excinfo:
_ = EditDistance(2)
assert "unknown distance algorithm" == str(excinfo.value)

def test_abstract_distance_comparer(self):
with pytest.raises(TypeError) as excinfo:
comparer = AbstractDistanceComparer()
_ = comparer.distance("string_1", "string_2", 10)
assert str(excinfo.value).startswith(
"Can't instantiate abstract class AbstractDistanceComparer"
)

def test_internal_distance_comparer(self, get_edit_distance):
edit_distance, expected = get_edit_distance
assert isinstance(edit_distance._distance_comparer, expected)

def test_comparer_match_ref(self, get_comparer, get_strings):
comparer, expected = get_comparer
strings, max_distance = get_strings

for s1 in strings:
for s2 in strings:
assert expected(s1, s2, max_distance) == comparer.distance(
s1, s2, max_distance
)

def test_comparer_null_distance(self, get_comparer, get_short_and_long_strings):
comparer, _ = get_comparer

for s1, s2, expected in get_short_and_long_strings:
distance = comparer.distance(s1, s2, 10)
assert expected["null"] == distance

def test_comparer_negative_max_distance(
self, get_comparer, get_short_and_long_strings
):
comparer, _ = get_comparer

for s1, s2, expected in get_short_and_long_strings:
distance = comparer.distance(s1, s2, 0)
assert expected["zero"] == distance

for s1, s2, expected in get_short_and_long_strings:
distance = comparer.distance(s1, s2, 0)
assert expected["neg"] == distance

def test_comparer_very_long_string(self, get_comparer):
comparer, _ = get_comparer
distance = comparer.distance(SHORT_STRING, VERY_LONG_STRING, 5)

assert -1 == distance

0 comments on commit fa0ca70

Please sign in to comment.