From 99d83da2fae296288de9083e7d53afca14a1c9b5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Teppo=20Per=C3=A4?= Date: Mon, 5 Jun 2023 08:39:44 +0100 Subject: [PATCH 1/8] adding flatten module --- pygim-common/_pygim/_utils/__init__.py | 2 +- .../_utils/{ => _iterable}/_iterable.py | 53 ------------- .../_pygim/_utils/_iterable/flatten.cpp | 40 ++++++++++ .../_pygim/_utils/_iterable/flatten.h | 37 +++++++++ .../_pygim/_utils/_iterable/iterable_fast.cpp | 72 ++++++++++++++++++ .../_pygim/_utils/_iterable/iterutils.h | 72 ++++++++++++++++++ pygim-common/pygim/iterables/__init__.py | 2 +- pygim-common/pygim/iterables/flatten.py | 75 +++++++++++++++++++ 8 files changed, 298 insertions(+), 55 deletions(-) rename pygim-common/_pygim/_utils/{ => _iterable}/_iterable.py (55%) create mode 100644 pygim-common/_pygim/_utils/_iterable/flatten.cpp create mode 100644 pygim-common/_pygim/_utils/_iterable/flatten.h create mode 100644 pygim-common/_pygim/_utils/_iterable/iterable_fast.cpp create mode 100644 pygim-common/_pygim/_utils/_iterable/iterutils.h create mode 100644 pygim-common/pygim/iterables/flatten.py diff --git a/pygim-common/_pygim/_utils/__init__.py b/pygim-common/_pygim/_utils/__init__.py index 4444f38..b1cba69 100644 --- a/pygim-common/_pygim/_utils/__init__.py +++ b/pygim-common/_pygim/_utils/__init__.py @@ -3,7 +3,7 @@ Internal utilities package. ''' -from ._iterable import * +from ._iterable._iterable import * from ._inspect import * diff --git a/pygim-common/_pygim/_utils/_iterable.py b/pygim-common/_pygim/_utils/_iterable/_iterable.py similarity index 55% rename from pygim-common/_pygim/_utils/_iterable.py rename to pygim-common/_pygim/_utils/_iterable/_iterable.py index 93c5b21..6f494a7 100644 --- a/pygim-common/_pygim/_utils/_iterable.py +++ b/pygim-common/_pygim/_utils/_iterable/_iterable.py @@ -54,31 +54,7 @@ def split(iterable, condition): def is_container(obj): - """ - Determine whether an object is a container. - - A container is considered an object that contains other objects. This - function returns `False` for strings, bytes, and types, even though they - implement the iterator protocol. - - Parameters - ---------- - obj : `object` - The object to check. - - Returns - ------- - `bool` - `True` if `obj` is a container, `False` otherwise. - - Examples - -------- - >>> is_container("text") - False - >>> is_container(tuple()) - True - """ if isinstance(obj, (str, bytes, type)): return False @@ -89,36 +65,7 @@ def is_container(obj): def flatten(iterable): - """ - Flatten a nested iterable into a single list. - - This function flattens nested iterables such as lists, tuples, and sets - into a single list. It can handle deeply nested and irregular structures. - - Parameters - ---------- - iterable : `iterable` - The nested iterable to flatten. - Yields - ------ - `object` - The flattened objects from the nested iterable. - - Examples - -------- - Flatten a list of lists: - >>> list(flatten([[1, 2], [3, 4]])) - [1, 2, 3, 4] - - Flatten a deeply nested irregular list: - >>> list(flatten([[[1, 2]], [[[3]]], 4, 5, [[6, [7, 8]]]])) - [1, 2, 3, 4, 5, 6, 7, 8] - - Flatten a list of strings: - >>> list(flatten(["one", "two", ["three", "four"]])) - ['one', 'two', 'three', 'four'] - """ for subitem in iterable: if is_container(subitem): yield from flatten(subitem) diff --git a/pygim-common/_pygim/_utils/_iterable/flatten.cpp b/pygim-common/_pygim/_utils/_iterable/flatten.cpp new file mode 100644 index 0000000..c1fe54d --- /dev/null +++ b/pygim-common/_pygim/_utils/_iterable/flatten.cpp @@ -0,0 +1,40 @@ +#include +#include +#include +#include // std::string + +#include "flatten.h" +#include "iterutils.h" + +FlattenGenerator::FlattenGenerator() {} + +FlattenGenerator::FlattenGenerator(py::iterator items) { + iterators.push_back(items); +} + +bool FlattenGenerator::isComplete() { + while (!iterators.empty() && iterators.back() == py::iterator::sentinel()) { + iterators.pop_back(); + } + return iterators.empty(); +} + +py::handle FlattenGenerator::next() { + // std::cout << "-> next()" << std::endl; + py::iterator &it = iterators.back(); + auto last = *it; + ++it; + + if (is_container(last)) { + // std::cout << "it's a list! " << std::endl; + iterators.push_back(py::iter(last)); + if (!isComplete()) { + return next(); + } else { + throw py::stop_iteration(); + } + } else { + // std::cout << "<- next() " << std::endl; + return last; + } +} diff --git a/pygim-common/_pygim/_utils/_iterable/flatten.h b/pygim-common/_pygim/_utils/_iterable/flatten.h new file mode 100644 index 0000000..0666e7a --- /dev/null +++ b/pygim-common/_pygim/_utils/_iterable/flatten.h @@ -0,0 +1,37 @@ +#ifndef FLATTEN_GENERATOR_H +#define FLATTEN_GENERATOR_H + +#include +#include + +#include "iterutils.h" + +namespace py = pybind11; + +inline py::iterator _ensure_iter(py::handle obj) { + if (py::isinstance(obj)) { + return obj.cast(); + } + + if (!is_container(obj)) { + return py::iter(tuplify(obj)); + } + + return py::iter(obj); +}; + + +class FlattenGenerator { +public: + FlattenGenerator(); + FlattenGenerator(py::iterator items); + + bool isComplete(); + + py::handle next(); + +private: + std::vector iterators; +}; + +#endif // FLATTEN_GENERATOR_H diff --git a/pygim-common/_pygim/_utils/_iterable/iterable_fast.cpp b/pygim-common/_pygim/_utils/_iterable/iterable_fast.cpp new file mode 100644 index 0000000..3080f0f --- /dev/null +++ b/pygim-common/_pygim/_utils/_iterable/iterable_fast.cpp @@ -0,0 +1,72 @@ +#include +#include + +#include "flatten.h" +#include "iterutils.h" +#include // std::string + +#define STRINGIFY(x) #x +#define MACRO_STRINGIFY(x) STRINGIFY(x) + +namespace py = pybind11; + +PYBIND11_MODULE(fast_iterable, m) +{ + m.doc() = "Module of fast iterables."; // optional module docstring + + // is_container + m.def("is_container", (bool (*)(const py::str&)) &is_container, "A function that checks if a Python str is a container."); + m.def("is_container", (bool (*)(const py::bytes&)) &is_container, "A function that checks if a Python bytes is a container."); + m.def("is_container", (bool (*)(const py::iterable&)) &is_container, "A function that converts an iterable to a tuple."); + m.def("is_container", (bool (*)(const py::memoryview&)) &is_container, "A function that checks if a Python memoryview is a container."); + m.def("is_container", (bool (*)(const py::handle&)) &is_container, "A generic function that checks if a Python object is a container."); + + // tuplify + m.def("tuplify", (py::tuple (*)(const py::bytes&)) &tuplify, "A function that converts a bytes object to a single-element tuple"); + m.def("tuplify", (py::tuple (*)(const py::str&)) &tuplify, "A function that converts a string object to a single-element tuple."); + m.def("tuplify", (py::tuple (*)(const py::tuple&)) &tuplify, "A function that converts a tuple to a tuple."); + m.def("tuplify", (py::tuple (*)(const py::dict&)) &tuplify, "A function that converts a dict to a tuple of key-value pairs."); + m.def("tuplify", (py::tuple (*)(const py::iterable&)) &tuplify, "A function that converts an iterable to a tuple."); + m.def("tuplify", (py::tuple (*)(const py::handle&)) &tuplify, "A function that converts a generic object to a single-element tuple."); + m.def("flatten_simple", [](py::iterable objects) { + py::iterator it = py::iter(objects); + py::list results; + for (; it != py::iterator::sentinel(); ++it) { + if (py::isinstance(*it)) { + py::iterator it2 = py::iter(*it); + for (; it2 != py::iterator::sentinel(); ++it2) { + results.append(*it2); + } + } else { + results.append(*it); + } + } + return results; + }); + py::class_(m, "flatten") + .def(py::init([](py::object objs) { return new FlattenGenerator(_ensure_iter(objs)); })) + .def("__iter__", [](const py::object &self) + { return self; }) + .def("__next__", + [](FlattenGenerator *self) + { + // std::cout << "-> __next__" << std::endl; + if (self->isComplete()) + { + // std::cout << "<- __next__ (complete)" << std::endl; + throw py::stop_iteration(); + } + + //py::gil_scoped_release release; + auto result = self->next(); + + // std::cout << "<- next" << std::endl; + return result; + }); + +#ifdef VERSION_INFO + m.attr("__version__") = MACRO_STRINGIFY(VERSION_INFO); +#else + m.attr("__version__") = "dev"; +#endif +} \ No newline at end of file diff --git a/pygim-common/_pygim/_utils/_iterable/iterutils.h b/pygim-common/_pygim/_utils/_iterable/iterutils.h new file mode 100644 index 0000000..1f631e0 --- /dev/null +++ b/pygim-common/_pygim/_utils/_iterable/iterutils.h @@ -0,0 +1,72 @@ +#pragma once + +#include +#include // std::string + +namespace py = pybind11; +/* +inline bool is_container(py::handle obj) { + if (py::isinstance(obj) || py::isinstance(obj)) { + return false; + } + + if (py::hasattr(obj, "__iter__")) { + return true; + } + + return py::isinstance(obj); +}; +*/ +// Base case function template for generic types +template +inline std::enable_if_t && !std::is_same_v && !std::is_same_v, bool> +is_container(const T& obj) { + if (py::hasattr(obj, "__iter__")) { + return true; + } + return false; +} + +// Specialization for py::str and py::bytes +template +inline std::enable_if_t || std::is_same_v, bool> +is_container(const T& obj) { + return false; +} + +// Specialization for py::memoryview +template +inline std::enable_if_t, bool> +is_container(const T& obj) { + return true; +} + + + +inline py::tuple tuplify(const py::tuple& arg) { + return arg; +}; + +inline py::tuple tuplify(const py::dict& arg) { + py::list kv_pairs; + for (const auto& item : arg) { + kv_pairs.append(py::make_tuple(item.first, item.second)); + } + return py::tuple(kv_pairs); +}; + +inline py::tuple tuplify(const py::iterable& arg) { + return py::tuple(arg); +}; + +inline py::tuple tuplify(const py::handle& arg) { + return py::make_tuple(arg); +}; + +inline py::tuple tuplify(const py::str& arg) { + return py::make_tuple(arg); +}; + +inline py::tuple tuplify(const py::bytes& arg) { + return py::make_tuple(arg); +}; \ No newline at end of file diff --git a/pygim-common/pygim/iterables/__init__.py b/pygim-common/pygim/iterables/__init__.py index d1fadba..66052e0 100644 --- a/pygim-common/pygim/iterables/__init__.py +++ b/pygim-common/pygim/iterables/__init__.py @@ -17,7 +17,7 @@ """ -from _pygim._utils._iterable import flatten, is_container, split +from .flatten import * __all__ = [ "flatten", diff --git a/pygim-common/pygim/iterables/flatten.py b/pygim-common/pygim/iterables/flatten.py new file mode 100644 index 0000000..060b4a1 --- /dev/null +++ b/pygim-common/pygim/iterables/flatten.py @@ -0,0 +1,75 @@ +# -*- coding: utf-8 -*- +""" + +""" + +try: + from _pygim._utils import iterable_fast as iterables +except ImportError: + from _pygim._utils import _iterable as iterables + + +is_container = iterables.is_container +is_container.__doc__ = """ + Determine whether an object is a container. + + A container is considered an object that contains other objects. This + function returns `False` for strings, bytes, and types, even though they + implement the iterator protocol. + + Parameters + ---------- + obj : `object` + The object to check. + + Returns + ------- + `bool` + `True` if `obj` is a container, `False` otherwise. + + Examples + -------- + >>> from pygim.iterables import is_container + >>> is_container("text") + False + + >>> is_container(tuple()) + True +""".split() + + +flatten = iterables.flatten +flatten.__doc__ = """ + Flatten a nested iterable into a single list. + + This function flattens nested iterables such as lists, tuples, and sets + into a single list. It can handle deeply nested and irregular structures. + + Parameters + ---------- + iterable : `iterable` + The nested iterable to flatten. + + Yields + ------ + `object` + The flattened objects from the nested iterable. + + Examples + -------- + Flatten a list of lists: + >>> from pygim.iterables import flatten + >>> list(flatten([[1, 2], [3, 4]])) + [1, 2, 3, 4] + + Flatten a deeply nested irregular list: + >>> list(flatten([[[1, 2]], [[[3]]], 4, 5, [[6, [7, 8]]]])) + [1, 2, 3, 4, 5, 6, 7, 8] + + Flatten a list of strings: + >>> list(flatten(["one", "two", ["three", "four"]])) + ['one', 'two', 'three', 'four'] +""".strip() + + +__all__ = ["flatten", "is_container"] From f334bf37e4e80a447b9647883e959ec7c2b482c0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Teppo=20Per=C3=A4?= Date: Sat, 10 Jun 2023 16:35:43 +0100 Subject: [PATCH 2/8] adding tests --- .../{test_iterable.py => test_flatten.py} | 59 ++++++++++++---- .../tests/unit_tests/test_iterutils.py | 69 +++++++++++++++++++ 2 files changed, 114 insertions(+), 14 deletions(-) rename pygim-common/tests/unit_tests/{test_iterable.py => test_flatten.py} (52%) create mode 100644 pygim-common/tests/unit_tests/test_iterutils.py diff --git a/pygim-common/tests/unit_tests/test_iterable.py b/pygim-common/tests/unit_tests/test_flatten.py similarity index 52% rename from pygim-common/tests/unit_tests/test_iterable.py rename to pygim-common/tests/unit_tests/test_flatten.py index 01735f8..1020d0f 100644 --- a/pygim-common/tests/unit_tests/test_iterable.py +++ b/pygim-common/tests/unit_tests/test_flatten.py @@ -11,13 +11,24 @@ def equals(left: object, right): return left == right +t = tuple FLATTEN_TESTS = [ + ([], []), ([1, 2, 3], [1, 2, 3]), ([[1], [2], [3]], [1, 2, 3]), ([[[[1]]], [[[2]]], [[[3]]]], [1, 2, 3]), (set([1, 2, 3]), [1, 2, 3]), ([set([1]), [[[2]]], 3], [1, 2, 3]), + (tuple([1, 2, 3]), [1, 2, 3]), + ([1, [], 2], [1, 2]), + ((1,2,(3,4)), [1,2,3,4]), + ((1,2,set([3,4])), [1,2,3,4]), + (range(10000), list(range(10000))), + ([[[[[[[[[[[]]]]]]]]]]], []), + ([[[[[[[[[[[1]]]]]]]]]]], [1]), + (set([t([t([t([])])])]), []), + (set([t([t([t([1])])])]), [1]), (["one", 2, 3], ["one", 2, 3]), ([["one"], [2], [3]], ["one", 2, 3]), @@ -36,14 +47,34 @@ def equals(left: object, right): ([[[[str]]], [[[None]]], [[[False]]]], [str, None, False]), (set([str, None, False]), [str, None, False]), ([set([str]), [[[None]]], False], [str, None, False]), + (iter([1, 2, 3]), [1, 2, 3]), + ((i for i in range(1, 4)), [1, 2, 3]), + ({"a": 1}, [("a", 1)]), + + (["keep as is"], ["keep as is"]), + ([b"keep as is"], [b"keep as is"]), + (memoryview(b"keep as is"), [b"keep as is"]), + (1, [1]), + (123.456, [123.456]), + (complex(1, 2), [complex(1, 2)]), + (None, [None]), + (True, [True]), ] -@pytest.mark.parametrize("input,expected_output", FLATTEN_TESTS) -def test_flatten(input, expected_output): - expected_output = input.__class__(expected_output) - actual_result = input.__class__(flatten(input)) - if not equals(actual_result, expected_output): - assert False +@pytest.mark.parametrize("input,expected_result", FLATTEN_TESTS) +def test_flatten(input, expected_result): + try: + expected_result = input.__class__(expected_result) + except (TypeError, ValueError): + expected_result = list(expected_result) + + actual_result = flatten(input) + try: + actual_result = input.__class__(actual_result) + except (TypeError, ValueError): + actual_result = list(actual_result) + if not equals(actual_result, expected_result): + assert False, f"Results differ:\n ACTUAL: {list(flatten(input))}\nEXPECTED: {expected_result} " IS_CONTAINER_TESTS = [ @@ -77,11 +108,11 @@ def test_flatten(input, expected_output): (dict(one=1), True), ] -@pytest.mark.parametrize("input,expected_output", IS_CONTAINER_TESTS) -def test_is_container(input, expected_output): +@pytest.mark.parametrize("input,expected_result", IS_CONTAINER_TESTS) +def test_is_container(input, expected_result): actual_result = is_container(input) - if not equals(actual_result, expected_output): - assert False, f"{type(input)} is not {expected_output}" + if not equals(actual_result, expected_result): + assert False, f"{type(input)} is not {expected_result}" SPLIT_TESTS = [ @@ -90,12 +121,12 @@ def test_is_container(input, expected_output): ([], lambda v: v <= 2, ([], [])), ] -@pytest.mark.parametrize("input,func,expected_output", SPLIT_TESTS) -def test_split(input, func, expected_output): +@pytest.mark.parametrize("input,func,expected_result", SPLIT_TESTS) +def test_split(input, func, expected_result): actual_result = split(input, func) - if not equals(actual_result, expected_output): - assert False, f"{actual_result} != {expected_output}" + if not equals(actual_result, expected_result): + assert False, f"{actual_result} != {expected_result}" if __name__ == "__main__": diff --git a/pygim-common/tests/unit_tests/test_iterutils.py b/pygim-common/tests/unit_tests/test_iterutils.py new file mode 100644 index 0000000..fc2505c --- /dev/null +++ b/pygim-common/tests/unit_tests/test_iterutils.py @@ -0,0 +1,69 @@ +#type: ignore +import pytest + +from pygim.utils.fast_iterable import tuplify, is_container + + +class CustomIterableObject: + def __iter__(self): + return [] + +class CustomNonIterableObject: + pass + + +@pytest.mark.parametrize("input,expected_result", [ + ((1, 2, 3), True), # Tuple is a container + ([1, 2, 3], True), # List is a container + (set([1, 2, 3]), True), # Set is a container + (range(1, 4), True), # Range is a container + ("123", False), # String is not considered a container + (b"123", False), # Byte string is not considered a container + (123, False), # Integer is not a container + (123.456, False), # Float is not a container + (None, False), # None is not a container + (True, False), # Boolean is not a container + ({"a": 1}, True), # Dictionary is a container + (complex(1, 2), False), # Complex number is not a container + (iter([1, 2, 3]), True), # Iterator is a container + ((i for i in range(1, 4)), True), # Generator is a container + (memoryview(b"123"), True), # Memoryview is a container + (CustomIterableObject(), True), # Custom iterable object is a container + (CustomNonIterableObject(), False), # Custom non-iterable object is not a container + ([[1, 2, 3]], True), # List of lists is a container + ({'a': set([1, 2, 3])}, True), # Dictionary of sets is a container +]) + +def test_is_container_with_various_types(input, expected_result): + actual_result = is_container(input) + + if actual_result != expected_result: + assert False, f"Results differ:\n ACTUAL: {actual_result}\nEXPECTED: {expected_result} " + + +@pytest.mark.parametrize("input,expected_result", [ + ((1, 2, 3), (1, 2, 3)), # Tuple remains unchanged + ([1, 2, 3], (1, 2, 3)), # List gets converted to tuple + (set([1, 2, 3]), (1, 2, 3)), # Set gets converted to tuple + (range(1, 4), (1, 2, 3)), # Range gets converted to tuple + ("123", ("123",)), # String remains as single-element tuple + (b"123", (b"123",)), # Byte string remains as single-element tuple + (123, (123,)), # Integer remains as single-element tuple + (123.456, (123.456,)), # Float remains as single-element tuple + (None, (None,)), # None remains as single-element tuple + (True, (True,)), # Boolean remains as single-element tuple + ({"a": 1}, (("a", 1),)), # Dictionary remains as single-element tuple + (complex(1, 2), (complex(1, 2),)), # Complex number remains as single-element tuple + (iter([1, 2, 3]), (1, 2, 3)), # Iterable gets converted to tuple + ((i for i in range(1, 4)), (1, 2, 3)), # Generator gets converted to tuple +]) +def test_tuplify_with_various_types(input, expected_result): + actual_result = tuplify(input) + + if actual_result != expected_result: + assert False, f"Results differ:\n ACTUAL: {actual_result}\nEXPECTED: {expected_result} " + + +if __name__ == "__main__": + import pytest + pytest.main([__file__]) From bb5f12f1e5e9155df6c70a60f3729d1b65da6e41 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Teppo=20Per=C3=A4?= Date: Sat, 10 Jun 2023 16:42:45 +0100 Subject: [PATCH 3/8] moving around --- .../_pygim/_utils/_iterable/__init__.py | 0 pygim-common/tests/unit_tests/test_flatten.py | 55 ++----------------- .../tests/unit_tests/test_iterutils.py | 55 +++++++++++++++++++ 3 files changed, 59 insertions(+), 51 deletions(-) create mode 100644 pygim-common/_pygim/_utils/_iterable/__init__.py diff --git a/pygim-common/_pygim/_utils/_iterable/__init__.py b/pygim-common/_pygim/_utils/_iterable/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/pygim-common/tests/unit_tests/test_flatten.py b/pygim-common/tests/unit_tests/test_flatten.py index 1020d0f..88b7000 100644 --- a/pygim-common/tests/unit_tests/test_flatten.py +++ b/pygim-common/tests/unit_tests/test_flatten.py @@ -5,7 +5,10 @@ from functools import singledispatch import pytest -from pygim.iterables import flatten, is_container, split +from _pygim._utils._iterable._iterable import flatten +from _pygim._utils._iterable.fast_iterable import flatten as flatten_fast + + @singledispatch def equals(left: object, right): @@ -77,56 +80,6 @@ def test_flatten(input, expected_result): assert False, f"Results differ:\n ACTUAL: {list(flatten(input))}\nEXPECTED: {expected_result} " -IS_CONTAINER_TESTS = [ - (str, False), - (bytes, False), - (bytearray, False), - (memoryview, False), - (range, False), - (list, False), - (tuple, False), - (int, False), - (float, False), - (complex, False), - (set, False), - (frozenset, False), - (dict, False), - - # Various instances - ('text', False), - (b'text', False), - (bytearray([1,2,3]), True), - (memoryview(bytearray([1,2,3])), True), - (range(100), True), - ([1,2,3], True), - ((1,2,3), True), - (42, False), - (42.42, False), - (complex(42, 42), False), - (set([1, 2, 3]), True), - (frozenset([1, 2, 3]), True), - (dict(one=1), True), -] - -@pytest.mark.parametrize("input,expected_result", IS_CONTAINER_TESTS) -def test_is_container(input, expected_result): - actual_result = is_container(input) - if not equals(actual_result, expected_result): - assert False, f"{type(input)} is not {expected_result}" - - -SPLIT_TESTS = [ - ([1, 2, 3, 4], lambda v: v % 2, ([1, 3], [2, 4])), - ([1, 2, 3, 4], lambda v: v <= 2, ([1, 2], [3, 4])), - ([], lambda v: v <= 2, ([], [])), -] - -@pytest.mark.parametrize("input,func,expected_result", SPLIT_TESTS) -def test_split(input, func, expected_result): - - actual_result = split(input, func) - if not equals(actual_result, expected_result): - assert False, f"{actual_result} != {expected_result}" if __name__ == "__main__": diff --git a/pygim-common/tests/unit_tests/test_iterutils.py b/pygim-common/tests/unit_tests/test_iterutils.py index fc2505c..8b9c73d 100644 --- a/pygim-common/tests/unit_tests/test_iterutils.py +++ b/pygim-common/tests/unit_tests/test_iterutils.py @@ -11,6 +11,61 @@ def __iter__(self): class CustomNonIterableObject: pass +''' + + +IS_CONTAINER_TESTS = [ + (str, False), + (bytes, False), + (bytearray, False), + (memoryview, False), + (range, False), + (list, False), + (tuple, False), + (int, False), + (float, False), + (complex, False), + (set, False), + (frozenset, False), + (dict, False), + + # Various instances + ('text', False), + (b'text', False), + (bytearray([1,2,3]), True), + (memoryview(bytearray([1,2,3])), True), + (range(100), True), + ([1,2,3], True), + ((1,2,3), True), + (42, False), + (42.42, False), + (complex(42, 42), False), + (set([1, 2, 3]), True), + (frozenset([1, 2, 3]), True), + (dict(one=1), True), +] + +@pytest.mark.parametrize("input,expected_result", IS_CONTAINER_TESTS) +def test_is_container(input, expected_result): + actual_result = is_container(input) + if not equals(actual_result, expected_result): + assert False, f"{type(input)} is not {expected_result}" + + +SPLIT_TESTS = [ + ([1, 2, 3, 4], lambda v: v % 2, ([1, 3], [2, 4])), + ([1, 2, 3, 4], lambda v: v <= 2, ([1, 2], [3, 4])), + ([], lambda v: v <= 2, ([], [])), +] + +@pytest.mark.parametrize("input,func,expected_result", SPLIT_TESTS) +def test_split(input, func, expected_result): + + actual_result = split(input, func) + if not equals(actual_result, expected_result): + assert False, f"{actual_result} != {expected_result}" + +''' @pytest.mark.parametrize("input,expected_result", [ ((1, 2, 3), True), # Tuple is a container From 4c390afeb28777b4eb12bac1e5e76a5bf4681b5c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Teppo=20Per=C3=A4?= Date: Sat, 10 Jun 2023 18:15:20 +0100 Subject: [PATCH 4/8] adding setup.py --- pygim-common/setup.py | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) create mode 100644 pygim-common/setup.py diff --git a/pygim-common/setup.py b/pygim-common/setup.py new file mode 100644 index 0000000..42dc489 --- /dev/null +++ b/pygim-common/setup.py @@ -0,0 +1,38 @@ +#type: ignore +import importlib +from pathlib import Path + +# Available at setup time due to pyproject.toml +from pybind11.setup_helpers import Pybind11Extension, build_ext +from setuptools import setup,find_packages +import toml + +ROOT = Path(__file__).parent +version_file = ROOT / "pygim/__version__.py" + +spec = importlib.util.spec_from_file_location(version_file) +version_module = importlib.util.module_from_spec(spec) +spec.loader.exec_module(version_module) +__version__ = version_module.__version__ + +pyproject = toml.loads(Path('pyproject.toml').read_text()) + + +ext_modules = [ + Pybind11Extension("utils.fast_iterable", + [ + "_pygim/_utils/iterable_fast.cpp", + "_pygim/_utils/flatten.cpp", + ], + # Example: passing in the version to the compiled code + define_macros = [('VERSION_INFO', __version__)], + ), +] + +cfg = {**pyproject["project"]} +cfg['packages']=find_packages('pygim') +cfg['package_dir']={'': 'pygim'} +cfg['ext_modules']=ext_modules +cfg['cmdclass']={"build_ext": build_ext} + +setup(**cfg) \ No newline at end of file From afbad53d60928a35d4cbc5bc95e23047bc08b745 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Teppo=20Per=C3=A4?= Date: Sun, 2 Jul 2023 16:23:02 +0100 Subject: [PATCH 5/8] snapshot --- pygim-common/_pygim/_utils/_inspect.py | 2 +- .../_pygim/_utils/_iterable/_iterable.py | 15 +-- pygim-common/pygim/fileio/pathset.py | 112 ++++++++++++++---- pygim-common/pyproject.toml | 4 +- pygim-common/setup.py | 23 ++-- pygim-common/tests/unit_tests/test_pathset.py | 1 + 6 files changed, 109 insertions(+), 48 deletions(-) diff --git a/pygim-common/_pygim/_utils/_inspect.py b/pygim-common/_pygim/_utils/_inspect.py index 6df9df2..d15925d 100644 --- a/pygim-common/_pygim/_utils/_inspect.py +++ b/pygim-common/_pygim/_utils/_inspect.py @@ -6,7 +6,7 @@ import inspect import types -from ._iterable import flatten +from ._iterable._iterable import flatten __all__ = ('TraitFunctions', 'has_instances', 'is_subset') diff --git a/pygim-common/_pygim/_utils/_iterable/_iterable.py b/pygim-common/_pygim/_utils/_iterable/_iterable.py index 6f494a7..14d3e08 100644 --- a/pygim-common/_pygim/_utils/_iterable/_iterable.py +++ b/pygim-common/_pygim/_utils/_iterable/_iterable.py @@ -3,6 +3,8 @@ This module contains internal utility functions. """ +from pathlib import Path + __all__ = ("split", "flatten", "is_container") @@ -55,7 +57,7 @@ def split(iterable, condition): def is_container(obj): - if isinstance(obj, (str, bytes, type)): + if isinstance(obj, (str, bytes, type, Path)): return False if hasattr(obj, "__iter__"): @@ -65,9 +67,8 @@ def is_container(obj): def flatten(iterable): - - for subitem in iterable: - if is_container(subitem): - yield from flatten(subitem) - else: - yield subitem + if is_container(iterable): + for o in iterable: + yield from flatten(o) + else: + yield iterable diff --git a/pygim-common/pygim/fileio/pathset.py b/pygim-common/pygim/fileio/pathset.py index c8f8faa..5c0d754 100644 --- a/pygim-common/pygim/fileio/pathset.py +++ b/pygim-common/pygim/fileio/pathset.py @@ -9,17 +9,22 @@ from _pygim._utils import is_container, flatten +__all__ = ["PathSet"] -def _flatten_paths(paths): - for path in flatten(paths): - path = Path(path) - if path.is_dir(): - yield path - for p in _flatten_paths(path.glob("*")): +def flatten_paths(paths, pattern): + if isinstance(paths, Path): + if paths.is_dir(): + yield paths + for p in flatten_paths(paths.glob(pattern), pattern): yield p else: - yield path + yield paths + else: + assert is_container(paths), f'Expected `iterable` got `{type(paths).__name__}`' + + for path in flatten(paths): + yield from flatten_paths(Path(path), pattern) class _FileSystemOps: @@ -45,25 +50,46 @@ def delete_all(self): @dataclass(frozen=True) class PathSet: """ - This class encapsulates manipulation of multiple path objects at once. - - Overview (further info in function docs): - - len(PathSet()) provides the total number of files and directories read recursively. - - list(PathSet()) provides a list of all Path objects in the list. - - bool(PathSet()) tells whether there are any Path objects in the list. - - repr(PathSet()) provides a nice string representation of this object. - - PathSet.prefixed() creates a new PathSet with another path as a prefix (e.g., folder+files). - - PathSet() + PathSet() creates a new object containing Path objects from both sets. - - PathSet().clone() creates an identical copy of the list. - - PathSet().filter() generator that yields Path objects whose properties match the filters. - - PathSet().drop() generator that yields Path objects whose properties do NOT match the filters. - - PathSet().filtered() as above, but returns a new PathSet object. - - PathSet().dirs() a shorthand for a list of directories. - - PathSet().files() a shorthand for a list of files. - - PathSet().by_suffix() a shorthand for filtering by suffix(es). - - PathSet().delete_all() deletes all contained Path objects from the file system. - + A class for manipulating multiple Path objects at once. + + Methods + ------- + prefixed(prefix : str) -> PathSet + Returns a new PathSet with `prefix` added to each path. + clone() -> PathSet + Returns a new PathSet that is a copy of this one. + filter(filter : callable) -> Generator + Yields paths from this set that pass a filter function. + drop(filter : callable) -> Generator + Yields paths from this set that do not pass a filter function. + filtered(filter : callable) -> PathSet + Returns a new PathSet of paths from this set that pass a filter function. + dirs() -> List + Returns a list of directories in this set. + files() -> List + Returns a list of files in this set. + by_suffix(suffix : str) -> PathSet + Returns a new PathSet of paths from this set with a given suffix. + FS.delete_all() -> None + Deletes all paths in this set from the file system. + transform(container_type : type = list, path_type : type = str) -> container_type + Returns a new container of `container_type` with elements of `path_type`. + + Examples + -------- + >>> paths = PathSet([Path('path1'), Path('path2')]) + >>> len(paths) + 2 + >>> bool(paths) + True + >>> list(paths) + [Path('path1'), Path('path2')] + >>> repr(paths) + "PathSet([Path('path1'), Path('path2')])" + >>> paths.prefixed("/new") + PathSet([Path('/new/path1'), Path('/new/path2')]) """ + # TODO: This class could allow multiple different path types (not just pathlib.Path). _paths: Path = None # type: ignore # this is invariant _pattern: str = "*" @@ -78,7 +104,7 @@ def __post_init__(self): # We just handled the optional part, let's make mypy happy. assert paths is not None - super().__setattr__("_paths", frozenset(_flatten_paths([paths]))) + super().__setattr__("_paths", frozenset(flatten_paths([paths], self._paths))) assert all([isinstance(p, Path) for p in self._paths]) assert isinstance(self._paths, frozenset) @@ -266,6 +292,40 @@ def __add__(self, other): assert isinstance(other, self.__class__) return self.clone(set(self._paths) | set(other._paths)) + def transform(self, container_type=list, path_type=str): + """ + Transform the container and elements of the instance to specified types. + + This function transforms the elements of the instance using the + `path_type` argument, and then packs them into a new container + specified by the `container_type` argument. + + Parameters + ---------- + container_type : type, optional + The type of the output container (default is `list`). This should + be a type (like `list` or `set`), not an instance of a type (like `[]` or `{}`). + path_type : type, optional + The type to convert each path in the instance (default is `str`). + This should be a callable that takes a path as input and returns + a new path of the desired type. + + Returns + ------- + container_type + The container filled with `path_type` objects. + + Examples + -------- + Given a class `PathSet` that holds a list of `Path` objects: + + >>> paths = PathSet([Path('path1'), Path('path2')]) + >>> transformed = paths.transform(container_type=set, path_type=str) + >>> print(transformed) + {'path1', 'path2'} + """ + return container_type(path_type(p) for p in self) + if __name__ == "__main__": import doctest diff --git a/pygim-common/pyproject.toml b/pygim-common/pyproject.toml index d6d8113..114c61e 100644 --- a/pygim-common/pyproject.toml +++ b/pygim-common/pyproject.toml @@ -1,6 +1,6 @@ [build-system] -requires = ["setuptools ~= 58.0", "wheel", "pdm-backend"] -build-backend = "pdm.backend" +requires = ["setuptools ~= 58.0", "wheel", "pdm-backend", "pybind11", "toml"] +#build-backend = "pdm.backend" [project] name = "pygim-common" diff --git a/pygim-common/setup.py b/pygim-common/setup.py index 42dc489..4969cc1 100644 --- a/pygim-common/setup.py +++ b/pygim-common/setup.py @@ -1,5 +1,5 @@ #type: ignore -import importlib +import sys from pathlib import Path # Available at setup time due to pyproject.toml @@ -8,27 +8,26 @@ import toml ROOT = Path(__file__).parent -version_file = ROOT / "pygim/__version__.py" +sys.path.append(str(ROOT / "pygim")) -spec = importlib.util.spec_from_file_location(version_file) -version_module = importlib.util.module_from_spec(spec) -spec.loader.exec_module(version_module) -__version__ = version_module.__version__ +from __version__ import __version__ +from pygim.fileio.pathset import PathSet pyproject = toml.loads(Path('pyproject.toml').read_text()) +print("===================") +#print(PathSet(ROOT, "*.cpp").transform()) +""" ext_modules = [ - Pybind11Extension("utils.fast_iterable", - [ - "_pygim/_utils/iterable_fast.cpp", - "_pygim/_utils/flatten.cpp", - ], + Pybind11Extension("_pygim._pygim_common_fast", + #PathSet(ROOT, "*.cpp").transform(), # Example: passing in the version to the compiled code define_macros = [('VERSION_INFO', __version__)], ), ] - +""" +ext_modules = [] cfg = {**pyproject["project"]} cfg['packages']=find_packages('pygim') cfg['package_dir']={'': 'pygim'} diff --git a/pygim-common/tests/unit_tests/test_pathset.py b/pygim-common/tests/unit_tests/test_pathset.py index a3a374b..f61ca4f 100644 --- a/pygim-common/tests/unit_tests/test_pathset.py +++ b/pygim-common/tests/unit_tests/test_pathset.py @@ -6,6 +6,7 @@ import pytest from pygim.fileio import PathSet +from pygim.fileio.pathset import flatten_paths # FIXME: TemporaryDirectory().cleanup() fails due to some weird # PermissionError in Windows environment in GitHub. From 290ca5a161a1df64eaedb780c700c6615a6480c6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Teppo=20Per=C3=A4?= Date: Mon, 3 Jul 2023 16:50:13 +0100 Subject: [PATCH 6/8] Adding tests --- .../example_02_overriding_methods.py | 2 +- .../_iterable/_iterable.py => _iterlib.py} | 8 ++ .../_iterable => _iterlib_fast}/flatten.cpp | 0 .../_iterable => _iterlib_fast}/flatten.h | 0 .../_iterable => _iterlib_fast}/iterutils.h | 13 ++- pygim-common/_pygim/_utils/__init__.py | 2 +- pygim-common/_pygim/_utils/_fileutils.py | 21 +++++ pygim-common/_pygim/_utils/_inspect.py | 2 +- .../_pygim/_utils/_iterable/__init__.py | 0 .../iterable_fast.cpp => common_fast.cpp} | 25 ++---- pygim-common/pygim/__init__.py | 6 ++ .../pygim/{check_mate.py => checklib.py} | 0 .../pygim/{exceptions.py => explib.py} | 0 pygim-common/pygim/fileio/pathset.py | 14 +-- pygim-common/pygim/gimmicks/entangled.pyi | 4 +- pygim-common/pygim/iterables/__init__.py | 26 ------ pygim-common/pygim/iterables/iterable.pyi | 7 -- pygim-common/pygim/iterlib.py | 15 ++++ .../{iterables/flatten.py => iterlib.pyi} | 31 +++---- pygim-common/setup.py | 27 +++--- pygim-common/tests/examples/test_examples.py | 2 +- pygim-common/tests/unit_tests/conftest.py | 13 +++ .../tests/unit_tests/test_fileutils.py | 35 ++++++++ pygim-common/tests/unit_tests/test_flatten.py | 4 +- .../{test_iterutils.py => test_iterlib.py} | 89 +++++++++---------- pygim-common/tests/unit_tests/test_pathset.py | 6 +- .../tests/unit_tests/test_sha256sum.py | 4 +- 27 files changed, 198 insertions(+), 158 deletions(-) rename pygim-common/_pygim/{_utils/_iterable/_iterable.py => _iterlib.py} (90%) rename pygim-common/_pygim/{_utils/_iterable => _iterlib_fast}/flatten.cpp (100%) rename pygim-common/_pygim/{_utils/_iterable => _iterlib_fast}/flatten.h (100%) rename pygim-common/_pygim/{_utils/_iterable => _iterlib_fast}/iterutils.h (85%) create mode 100644 pygim-common/_pygim/_utils/_fileutils.py delete mode 100644 pygim-common/_pygim/_utils/_iterable/__init__.py rename pygim-common/_pygim/{_utils/_iterable/iterable_fast.cpp => common_fast.cpp} (79%) rename pygim-common/pygim/{check_mate.py => checklib.py} (100%) rename pygim-common/pygim/{exceptions.py => explib.py} (100%) delete mode 100644 pygim-common/pygim/iterables/__init__.py delete mode 100644 pygim-common/pygim/iterables/iterable.pyi create mode 100644 pygim-common/pygim/iterlib.py rename pygim-common/pygim/{iterables/flatten.py => iterlib.pyi} (79%) create mode 100644 pygim-common/tests/unit_tests/test_fileutils.py rename pygim-common/tests/unit_tests/{test_iterutils.py => test_iterlib.py} (61%) diff --git a/docs/examples/entangled_classes/example_02_overriding_methods.py b/docs/examples/entangled_classes/example_02_overriding_methods.py index 42d6c91..97039c8 100644 --- a/docs/examples/entangled_classes/example_02_overriding_methods.py +++ b/docs/examples/entangled_classes/example_02_overriding_methods.py @@ -1,6 +1,6 @@ # type: ignore from pygim.gimmicks import EntangledClass -from pygim.exceptions import EntangledMethodError +from pygim.explib import EntangledMethodError # This creates inheritable class locally so that it is more evident, which class is # used. diff --git a/pygim-common/_pygim/_utils/_iterable/_iterable.py b/pygim-common/_pygim/_iterlib.py similarity index 90% rename from pygim-common/_pygim/_utils/_iterable/_iterable.py rename to pygim-common/_pygim/_iterlib.py index 14d3e08..b2991bb 100644 --- a/pygim-common/_pygim/_utils/_iterable/_iterable.py +++ b/pygim-common/_pygim/_iterlib.py @@ -55,6 +55,14 @@ def split(iterable, condition): return left, right +def tuplify(obj): + if isinstance(obj, dict): + return tuple((k, v) for k, v in obj.items()) + if is_container(obj): + return tuple(list(obj)) + return obj, + + def is_container(obj): if isinstance(obj, (str, bytes, type, Path)): diff --git a/pygim-common/_pygim/_utils/_iterable/flatten.cpp b/pygim-common/_pygim/_iterlib_fast/flatten.cpp similarity index 100% rename from pygim-common/_pygim/_utils/_iterable/flatten.cpp rename to pygim-common/_pygim/_iterlib_fast/flatten.cpp diff --git a/pygim-common/_pygim/_utils/_iterable/flatten.h b/pygim-common/_pygim/_iterlib_fast/flatten.h similarity index 100% rename from pygim-common/_pygim/_utils/_iterable/flatten.h rename to pygim-common/_pygim/_iterlib_fast/flatten.h diff --git a/pygim-common/_pygim/_utils/_iterable/iterutils.h b/pygim-common/_pygim/_iterlib_fast/iterutils.h similarity index 85% rename from pygim-common/_pygim/_utils/_iterable/iterutils.h rename to pygim-common/_pygim/_iterlib_fast/iterutils.h index 1f631e0..dd30266 100644 --- a/pygim-common/_pygim/_utils/_iterable/iterutils.h +++ b/pygim-common/_pygim/_iterlib_fast/iterutils.h @@ -19,7 +19,7 @@ inline bool is_container(py::handle obj) { */ // Base case function template for generic types template -inline std::enable_if_t && !std::is_same_v && !std::is_same_v, bool> +inline std::enable_if_t && !std::is_same_v && !std::is_same_v && !std::is_same_v, bool> is_container(const T& obj) { if (py::hasattr(obj, "__iter__")) { return true; @@ -34,6 +34,15 @@ is_container(const T& obj) { return false; } + +// Specialization for py::memoryview +template +inline std::enable_if_t, bool> +is_container(const T& obj) { + return false; +} + + // Specialization for py::memoryview template inline std::enable_if_t, bool> @@ -69,4 +78,4 @@ inline py::tuple tuplify(const py::str& arg) { inline py::tuple tuplify(const py::bytes& arg) { return py::make_tuple(arg); -}; \ No newline at end of file +}; diff --git a/pygim-common/_pygim/_utils/__init__.py b/pygim-common/_pygim/_utils/__init__.py index b1cba69..85f0385 100644 --- a/pygim-common/_pygim/_utils/__init__.py +++ b/pygim-common/_pygim/_utils/__init__.py @@ -3,7 +3,7 @@ Internal utilities package. ''' -from ._iterable._iterable import * +from .._iterlib import * from ._inspect import * diff --git a/pygim-common/_pygim/_utils/_fileutils.py b/pygim-common/_pygim/_utils/_fileutils.py new file mode 100644 index 0000000..41452a6 --- /dev/null +++ b/pygim-common/_pygim/_utils/_fileutils.py @@ -0,0 +1,21 @@ +# -*- coding: utf-8 -*- +''' +Internal package for file utils. +''' + +from pathlib import Path +from .._iterlib import flatten, is_container + + +def flatten_paths(paths, pattern): + if isinstance(paths, Path): + if paths.is_dir(): + yield paths + ps = list(paths.rglob(pattern)) + yield from ps + else: + yield paths + else: + assert is_container(paths), f'Expected `iterable` got `{type(paths).__name__}`' + for path in flatten(paths): + yield from flatten_paths(Path(path), pattern) \ No newline at end of file diff --git a/pygim-common/_pygim/_utils/_inspect.py b/pygim-common/_pygim/_utils/_inspect.py index d15925d..f31729e 100644 --- a/pygim-common/_pygim/_utils/_inspect.py +++ b/pygim-common/_pygim/_utils/_inspect.py @@ -6,7 +6,7 @@ import inspect import types -from ._iterable._iterable import flatten +from .._iterlib import flatten __all__ = ('TraitFunctions', 'has_instances', 'is_subset') diff --git a/pygim-common/_pygim/_utils/_iterable/__init__.py b/pygim-common/_pygim/_utils/_iterable/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/pygim-common/_pygim/_utils/_iterable/iterable_fast.cpp b/pygim-common/_pygim/common_fast.cpp similarity index 79% rename from pygim-common/_pygim/_utils/_iterable/iterable_fast.cpp rename to pygim-common/_pygim/common_fast.cpp index 3080f0f..f9fc73b 100644 --- a/pygim-common/_pygim/_utils/_iterable/iterable_fast.cpp +++ b/pygim-common/_pygim/common_fast.cpp @@ -1,8 +1,8 @@ #include #include -#include "flatten.h" -#include "iterutils.h" +#include "_iterlib_fast/flatten.h" +#include "_iterlib_fast/iterutils.h" #include // std::string #define STRINGIFY(x) #x @@ -10,11 +10,12 @@ namespace py = pybind11; -PYBIND11_MODULE(fast_iterable, m) +PYBIND11_MODULE(common_fast, m) { - m.doc() = "Module of fast iterables."; // optional module docstring + m.doc() = "Python Gimmicks Common library."; // optional module docstring // is_container + m.def("is_container", (bool (*)(const py::type&)) &is_container, "A generic function that checks if a Python type is a container."); m.def("is_container", (bool (*)(const py::str&)) &is_container, "A function that checks if a Python str is a container."); m.def("is_container", (bool (*)(const py::bytes&)) &is_container, "A function that checks if a Python bytes is a container."); m.def("is_container", (bool (*)(const py::iterable&)) &is_container, "A function that converts an iterable to a tuple."); @@ -28,21 +29,7 @@ PYBIND11_MODULE(fast_iterable, m) m.def("tuplify", (py::tuple (*)(const py::dict&)) &tuplify, "A function that converts a dict to a tuple of key-value pairs."); m.def("tuplify", (py::tuple (*)(const py::iterable&)) &tuplify, "A function that converts an iterable to a tuple."); m.def("tuplify", (py::tuple (*)(const py::handle&)) &tuplify, "A function that converts a generic object to a single-element tuple."); - m.def("flatten_simple", [](py::iterable objects) { - py::iterator it = py::iter(objects); - py::list results; - for (; it != py::iterator::sentinel(); ++it) { - if (py::isinstance(*it)) { - py::iterator it2 = py::iter(*it); - for (; it2 != py::iterator::sentinel(); ++it2) { - results.append(*it2); - } - } else { - results.append(*it); - } - } - return results; - }); + py::class_(m, "flatten") .def(py::init([](py::object objs) { return new FlattenGenerator(_ensure_iter(objs)); })) .def("__iter__", [](const py::object &self) diff --git a/pygim-common/pygim/__init__.py b/pygim-common/pygim/__init__.py index ca01cdd..d7163c1 100644 --- a/pygim-common/pygim/__init__.py +++ b/pygim-common/pygim/__init__.py @@ -19,6 +19,12 @@ --------- dispatch.dispatch A function that supersedes `singledispatch(method)`. +iterarium.flatten(iterable) + Convert nested arrays into a single flat array. +iterarium.is_container(obj) + Check whether an object is iterable but not a string or bytes. +iterarium.split(iterable, condition) + Split an iterable into two iterables based on a condition function. Examples -------- diff --git a/pygim-common/pygim/check_mate.py b/pygim-common/pygim/checklib.py similarity index 100% rename from pygim-common/pygim/check_mate.py rename to pygim-common/pygim/checklib.py diff --git a/pygim-common/pygim/exceptions.py b/pygim-common/pygim/explib.py similarity index 100% rename from pygim-common/pygim/exceptions.py rename to pygim-common/pygim/explib.py diff --git a/pygim-common/pygim/fileio/pathset.py b/pygim-common/pygim/fileio/pathset.py index 5c0d754..903a26a 100644 --- a/pygim-common/pygim/fileio/pathset.py +++ b/pygim-common/pygim/fileio/pathset.py @@ -7,24 +7,12 @@ from pathlib import Path from dataclasses import dataclass -from _pygim._utils import is_container, flatten +from pygim.iterlib import flatten, is_container __all__ = ["PathSet"] -def flatten_paths(paths, pattern): - if isinstance(paths, Path): - if paths.is_dir(): - yield paths - for p in flatten_paths(paths.glob(pattern), pattern): - yield p - else: - yield paths - else: - assert is_container(paths), f'Expected `iterable` got `{type(paths).__name__}`' - for path in flatten(paths): - yield from flatten_paths(Path(path), pattern) class _FileSystemOps: diff --git a/pygim-common/pygim/gimmicks/entangled.pyi b/pygim-common/pygim/gimmicks/entangled.pyi index 939f2d9..df85af0 100644 --- a/pygim-common/pygim/gimmicks/entangled.pyi +++ b/pygim-common/pygim/gimmicks/entangled.pyi @@ -1,5 +1,5 @@ -import pygim.exceptions as ex -from .cached_type import CachedTypeMeta +import pygim.explib as ex +from _pygim._magic.cached_type import CachedTypeMeta def overrideable(func): ... def overrides(func): ... diff --git a/pygim-common/pygim/iterables/__init__.py b/pygim-common/pygim/iterables/__init__.py deleted file mode 100644 index 66052e0..0000000 --- a/pygim-common/pygim/iterables/__init__.py +++ /dev/null @@ -1,26 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Iterable Utilities - -This module provides utilities for working with iterables. - -Functions ---------- -flatten(iterable) - Convert nested arrays into a single flat array. - -is_container(obj) - Check whether an object is iterable but not a string or bytes. - -split(iterable, condition) - Split an iterable into two iterables based on a condition function. - -""" - -from .flatten import * - -__all__ = [ - "flatten", - "is_container", - "split", -] \ No newline at end of file diff --git a/pygim-common/pygim/iterables/iterable.pyi b/pygim-common/pygim/iterables/iterable.pyi deleted file mode 100644 index 8171732..0000000 --- a/pygim-common/pygim/iterables/iterable.pyi +++ /dev/null @@ -1,7 +0,0 @@ -from typing import Iterable, Any, Callable, Tuple, Generator - -def split( - iterable: Iterable[Any], condition: Callable[[Any], bool] -) -> Tuple[Iterable[Any], Iterable[Any]]: ... -def is_container(obj: Any) -> bool: ... -def flatten(items: Iterable[Any]) -> Generator[Any, None, None]: ... diff --git a/pygim-common/pygim/iterlib.py b/pygim-common/pygim/iterlib.py new file mode 100644 index 0000000..b171cb4 --- /dev/null +++ b/pygim-common/pygim/iterlib.py @@ -0,0 +1,15 @@ +# -*- coding: utf-8 -*- +""" + +""" + +try: + import _pygim.common_fast as _mod +except ImportError: + from _pygim import _iterlib as _mod + + +__all__ = ["flatten", "is_container"] + +flatten = _mod.flatten +is_container = _mod.is_container diff --git a/pygim-common/pygim/iterables/flatten.py b/pygim-common/pygim/iterlib.pyi similarity index 79% rename from pygim-common/pygim/iterables/flatten.py rename to pygim-common/pygim/iterlib.pyi index 060b4a1..82b6097 100644 --- a/pygim-common/pygim/iterables/flatten.py +++ b/pygim-common/pygim/iterlib.pyi @@ -1,16 +1,10 @@ -# -*- coding: utf-8 -*- -""" +from typing import Iterable, Any, Callable, Tuple, Generator -""" - -try: - from _pygim._utils import iterable_fast as iterables -except ImportError: - from _pygim._utils import _iterable as iterables - - -is_container = iterables.is_container -is_container.__doc__ = """ +def split( + iterable: Iterable[Any], condition: Callable[[Any], bool] +) -> Tuple[Iterable[Any], Iterable[Any]]: ... +def is_container(obj: Any) -> bool: + """ Determine whether an object is a container. A container is considered an object that contains other objects. This @@ -35,11 +29,11 @@ >>> is_container(tuple()) True -""".split() + """ + ... - -flatten = iterables.flatten -flatten.__doc__ = """ +def flatten(items: Iterable[Any]) -> Generator[Any, None, None]: + """ Flatten a nested iterable into a single list. This function flattens nested iterables such as lists, tuples, and sets @@ -69,7 +63,4 @@ Flatten a list of strings: >>> list(flatten(["one", "two", ["three", "four"]])) ['one', 'two', 'three', 'four'] -""".strip() - - -__all__ = ["flatten", "is_container"] + """ \ No newline at end of file diff --git a/pygim-common/setup.py b/pygim-common/setup.py index 4969cc1..bac8782 100644 --- a/pygim-common/setup.py +++ b/pygim-common/setup.py @@ -4,34 +4,35 @@ # Available at setup time due to pyproject.toml from pybind11.setup_helpers import Pybind11Extension, build_ext -from setuptools import setup,find_packages +from setuptools import setup,find_namespace_packages import toml ROOT = Path(__file__).parent sys.path.append(str(ROOT / "pygim")) from __version__ import __version__ -from pygim.fileio.pathset import PathSet pyproject = toml.loads(Path('pyproject.toml').read_text()) -print("===================") -#print(PathSet(ROOT, "*.cpp").transform()) - -""" ext_modules = [ - Pybind11Extension("_pygim._pygim_common_fast", - #PathSet(ROOT, "*.cpp").transform(), + Pybind11Extension("_pygim.common_fast", + list(str(p) for p in Path(ROOT).rglob("*.cpp")), # Example: passing in the version to the compiled code define_macros = [('VERSION_INFO', __version__)], ), ] -""" -ext_modules = [] + +pygim = map(lambda v: ('pygim.' + v), find_namespace_packages('pygim')) +pygim_internal = map(lambda v: ('_pygim.' + v), find_namespace_packages('_pygim')) + + cfg = {**pyproject["project"]} -cfg['packages']=find_packages('pygim') -cfg['package_dir']={'': 'pygim'} +cfg['packages']= list(pygim) + list(pygim_internal) + ['pygim', '_pygim'] +cfg['package_dir']={ + '': '.', + } cfg['ext_modules']=ext_modules cfg['cmdclass']={"build_ext": build_ext} - +from pprint import pp +pp(cfg) setup(**cfg) \ No newline at end of file diff --git a/pygim-common/tests/examples/test_examples.py b/pygim-common/tests/examples/test_examples.py index 84fdbbe..f737d76 100644 --- a/pygim-common/tests/examples/test_examples.py +++ b/pygim-common/tests/examples/test_examples.py @@ -5,7 +5,7 @@ ROOT = Path(__file__).parents[3] -EXAMPLES = PathSet(ROOT / 'docs/examples').files(suffix=".py" ) +EXAMPLES = PathSet(ROOT / 'docs/examples').files(suffix=".py") assert EXAMPLES EXAMPLES = list(reversed(sorted(EXAMPLES))) diff --git a/pygim-common/tests/unit_tests/conftest.py b/pygim-common/tests/unit_tests/conftest.py index ec85c1d..0307b13 100644 --- a/pygim-common/tests/unit_tests/conftest.py +++ b/pygim-common/tests/unit_tests/conftest.py @@ -14,6 +14,19 @@ def temp_dir(): tdir.cleanup() + +@pytest.fixture() +def filled_temp_dir(temp_dir): + _FILES = ['readme.txt', 'readme.rst', 'AUTHORS.rst'] + test_files = [temp_dir / f for f in _FILES] + assert not any(f.is_file() for f in test_files) + + [f.touch() for f in test_files] + assert all(f.is_file() for f in test_files) + + yield temp_dir + + import importlib.util import sys diff --git a/pygim-common/tests/unit_tests/test_fileutils.py b/pygim-common/tests/unit_tests/test_fileutils.py new file mode 100644 index 0000000..8f3196a --- /dev/null +++ b/pygim-common/tests/unit_tests/test_fileutils.py @@ -0,0 +1,35 @@ +# -*- coding: utf-8 -*- +import pytest + +from _pygim._utils._fileutils import flatten_paths + + +def test_flatten_paths_on_flat_dir(filled_temp_dir): + files = list(flatten_paths(filled_temp_dir, "*")) + files = [d.name for d in files[1:]] + + assert files == ['AUTHORS.rst', 'readme.txt', 'readme.rst'] + + +def test_flatten_paths_on_deep_dir(filled_temp_dir): + t_dir_1 = filled_temp_dir / "test1" + t_dir_2 = filled_temp_dir / "test2" + + t_dir_1.mkdir() + t_dir_2.mkdir() + + (t_dir_1 / "test.txt").touch() + (t_dir_2 / "test.txt").touch() + + files = list(flatten_paths(filled_temp_dir, "*")) + files = [d.name for d in files[1:]] + + assert files == ['AUTHORS.rst', 'test1', 'readme.txt', 'readme.rst', + 'test2', 'test.txt', 'test.txt'] + + +if __name__ == '__main__': + from pygim.testing import run_tests + + # With coverage run, tests fail in meta.__call__ due to reload. + run_tests(__file__, flatten_paths.__module__, coverage=False) diff --git a/pygim-common/tests/unit_tests/test_flatten.py b/pygim-common/tests/unit_tests/test_flatten.py index 88b7000..001d2cd 100644 --- a/pygim-common/tests/unit_tests/test_flatten.py +++ b/pygim-common/tests/unit_tests/test_flatten.py @@ -5,8 +5,8 @@ from functools import singledispatch import pytest -from _pygim._utils._iterable._iterable import flatten -from _pygim._utils._iterable.fast_iterable import flatten as flatten_fast +from _pygim._iterlib import flatten +from _pygim.common_fast import flatten as flatten_fast diff --git a/pygim-common/tests/unit_tests/test_iterutils.py b/pygim-common/tests/unit_tests/test_iterlib.py similarity index 61% rename from pygim-common/tests/unit_tests/test_iterutils.py rename to pygim-common/tests/unit_tests/test_iterlib.py index 8b9c73d..e61a2d5 100644 --- a/pygim-common/tests/unit_tests/test_iterutils.py +++ b/pygim-common/tests/unit_tests/test_iterlib.py @@ -1,57 +1,21 @@ #type: ignore import pytest -from pygim.utils.fast_iterable import tuplify, is_container +from _pygim.common_fast import ( + tuplify as tuplify_fast, + is_container as is_container_fast) +from _pygim._iterlib import is_container, tuplify class CustomIterableObject: def __iter__(self): return [] + class CustomNonIterableObject: pass ''' - - -IS_CONTAINER_TESTS = [ - (str, False), - (bytes, False), - (bytearray, False), - (memoryview, False), - (range, False), - (list, False), - (tuple, False), - (int, False), - (float, False), - (complex, False), - (set, False), - (frozenset, False), - (dict, False), - - # Various instances - ('text', False), - (b'text', False), - (bytearray([1,2,3]), True), - (memoryview(bytearray([1,2,3])), True), - (range(100), True), - ([1,2,3], True), - ((1,2,3), True), - (42, False), - (42.42, False), - (complex(42, 42), False), - (set([1, 2, 3]), True), - (frozenset([1, 2, 3]), True), - (dict(one=1), True), -] - -@pytest.mark.parametrize("input,expected_result", IS_CONTAINER_TESTS) -def test_is_container(input, expected_result): - actual_result = is_container(input) - if not equals(actual_result, expected_result): - assert False, f"{type(input)} is not {expected_result}" - - SPLIT_TESTS = [ ([1, 2, 3, 4], lambda v: v % 2, ([1, 3], [2, 4])), ([1, 2, 3, 4], lambda v: v <= 2, ([1, 2], [3, 4])), @@ -68,12 +32,27 @@ def test_split(input, func, expected_result): ''' @pytest.mark.parametrize("input,expected_result", [ + (str, False), # str-type is not a container + (bytes, False), # bytes-type is not a container + (bytearray, False), # bytearray-type is not a container + (memoryview, False), # memoryview-type is not a container + (range, False), # range-type is not a container + (list, False), # list-type is not a container + (tuple, False), # tuple-type is not a container + (int, False), # int-type is not a container + (float, False), # float-type is not a container + (complex, False), # complex-type is not a container + (set, False), # set-type is not a container + (frozenset, False), # frozenset-type is not a container + (dict, False), # dict-type is not a container + ((1, 2, 3), True), # Tuple is a container ([1, 2, 3], True), # List is a container (set([1, 2, 3]), True), # Set is a container (range(1, 4), True), # Range is a container ("123", False), # String is not considered a container (b"123", False), # Byte string is not considered a container + (bytearray(122), True), # Byte array is not considered a container (123, False), # Integer is not a container (123.456, False), # Float is not a container (None, False), # None is not a container @@ -90,10 +69,16 @@ def test_split(input, func, expected_result): ]) def test_is_container_with_various_types(input, expected_result): + actual_result_fast = is_container_fast(input) actual_result = is_container(input) - if actual_result != expected_result: - assert False, f"Results differ:\n ACTUAL: {actual_result}\nEXPECTED: {expected_result} " + if not (actual_result == actual_result_fast == expected_result): + assert False, "\n".join([ + f"Results differ for `{input}`:", + f" ACTUAL: {actual_result}", + f"ACTUAL (fast): {actual_result_fast}", + f" EXPECTED: {expected_result} ", + ]) @pytest.mark.parametrize("input,expected_result", [ @@ -109,14 +94,24 @@ def test_is_container_with_various_types(input, expected_result): (True, (True,)), # Boolean remains as single-element tuple ({"a": 1}, (("a", 1),)), # Dictionary remains as single-element tuple (complex(1, 2), (complex(1, 2),)), # Complex number remains as single-element tuple - (iter([1, 2, 3]), (1, 2, 3)), # Iterable gets converted to tuple - ((i for i in range(1, 4)), (1, 2, 3)), # Generator gets converted to tuple ]) def test_tuplify_with_various_types(input, expected_result): actual_result = tuplify(input) + actual_result_fast = tuplify_fast(input) + + if not (actual_result == actual_result_fast == expected_result): + assert False, "\n".join([ + f"Results differ for `{input}`:", + f" ACTUAL: {actual_result}", + f"ACTUAL (fast): {actual_result_fast}", + f" EXPECTED: {expected_result} ", + ]) + + +def test_tuplify_with_generators(): + assert tuplify(iter([1, 2, 3])) == tuplify_fast(iter([1, 2, 3])) == (1,2,3) + assert tuplify(i for i in range(1, 4)) == tuplify_fast(i for i in range(1, 4)) == (1,2,3) - if actual_result != expected_result: - assert False, f"Results differ:\n ACTUAL: {actual_result}\nEXPECTED: {expected_result} " if __name__ == "__main__": diff --git a/pygim-common/tests/unit_tests/test_pathset.py b/pygim-common/tests/unit_tests/test_pathset.py index f61ca4f..6e55174 100644 --- a/pygim-common/tests/unit_tests/test_pathset.py +++ b/pygim-common/tests/unit_tests/test_pathset.py @@ -129,6 +129,10 @@ def test_dropped_files_based_on_filter(filled_temp_dir): assert new_paths == PathSet([]) +def test_flatten_paths(temp_dir): + pass + + if __name__ == "__main__": - from pygim.utils.testing import run_tests + from pygim.testing import run_tests run_tests(__file__, PathSet.__module__, coverage=False) \ No newline at end of file diff --git a/pygim-common/tests/unit_tests/test_sha256sum.py b/pygim-common/tests/unit_tests/test_sha256sum.py index bced3ec..a1ab9eb 100644 --- a/pygim-common/tests/unit_tests/test_sha256sum.py +++ b/pygim-common/tests/unit_tests/test_sha256sum.py @@ -8,8 +8,8 @@ from unittest.mock import patch from pygim.security import sha256sum, sha256sum_file -from pygim.exceptions import ShaSumTargetNotFoundError -from pygim.iterables import flatten +from pygim.explib import ShaSumTargetNotFoundError +from pygim.iterlib import flatten TESTS = [ From 061146b7f74565a7ed2e7e11b64ffbda39274a14 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Teppo=20Per=C3=A4?= Date: Fri, 7 Jul 2023 12:59:01 +0100 Subject: [PATCH 7/8] snapshot --- pygim-common/_pygim/_iterlib_fast/flatten.cpp | 12 +- pygim-common/_pygim/_iterlib_fast/flatten.h | 1 + pygim-common/_pygim/_iterlib_fast/iterutils.h | 30 ++--- pygim-common/_pygim/_utils/_fileutils.py | 24 ++-- pygim-common/_pygim/common_fast.cpp | 39 ++++--- pygim-common/pygim/fileio/pathset.py | 8 +- .../tests/unit_tests/test_fileutils.py | 4 +- pygim-common/tests/unit_tests/test_flatten.py | 105 ++++++++++-------- pygim-common/tests/unit_tests/test_iterlib.py | 3 +- pygim-common/tests/unit_tests/test_pathset.py | 6 +- 10 files changed, 128 insertions(+), 104 deletions(-) diff --git a/pygim-common/_pygim/_iterlib_fast/flatten.cpp b/pygim-common/_pygim/_iterlib_fast/flatten.cpp index c1fe54d..4ea61c8 100644 --- a/pygim-common/_pygim/_iterlib_fast/flatten.cpp +++ b/pygim-common/_pygim/_iterlib_fast/flatten.cpp @@ -8,6 +8,12 @@ FlattenGenerator::FlattenGenerator() {} +FlattenGenerator::~FlattenGenerator() { + std::cout << "destruct()"; + std::cout << py::str(iterators.back()) << std::endl; + iterators.clear(); +} + FlattenGenerator::FlattenGenerator(py::iterator items) { iterators.push_back(items); } @@ -20,13 +26,13 @@ bool FlattenGenerator::isComplete() { } py::handle FlattenGenerator::next() { - // std::cout << "-> next()" << std::endl; + std::cout << "-> next()" << std::endl; py::iterator &it = iterators.back(); auto last = *it; ++it; if (is_container(last)) { - // std::cout << "it's a list! " << std::endl; + std::cout << "it's a list!" << py::str(last) << std::endl; iterators.push_back(py::iter(last)); if (!isComplete()) { return next(); @@ -34,7 +40,7 @@ py::handle FlattenGenerator::next() { throw py::stop_iteration(); } } else { - // std::cout << "<- next() " << std::endl; + std::cout << "<- next() " << std::endl; return last; } } diff --git a/pygim-common/_pygim/_iterlib_fast/flatten.h b/pygim-common/_pygim/_iterlib_fast/flatten.h index 0666e7a..a9bd6f1 100644 --- a/pygim-common/_pygim/_iterlib_fast/flatten.h +++ b/pygim-common/_pygim/_iterlib_fast/flatten.h @@ -25,6 +25,7 @@ class FlattenGenerator { public: FlattenGenerator(); FlattenGenerator(py::iterator items); + ~FlattenGenerator(); bool isComplete(); diff --git a/pygim-common/_pygim/_iterlib_fast/iterutils.h b/pygim-common/_pygim/_iterlib_fast/iterutils.h index dd30266..43f82d5 100644 --- a/pygim-common/_pygim/_iterlib_fast/iterutils.h +++ b/pygim-common/_pygim/_iterlib_fast/iterutils.h @@ -4,23 +4,19 @@ #include // std::string namespace py = pybind11; -/* -inline bool is_container(py::handle obj) { - if (py::isinstance(obj) || py::isinstance(obj)) { - return false; - } - if (py::hasattr(obj, "__iter__")) { - return true; - } - - return py::isinstance(obj); -}; -*/ // Base case function template for generic types template -inline std::enable_if_t && !std::is_same_v && !std::is_same_v && !std::is_same_v, bool> +inline std::enable_if_t< + !std::is_same_v && + !std::is_same_v && + !std::is_same_v && + !std::is_same_v, + bool> is_container(const T& obj) { + if (py::isinstance(obj) | py::isinstance(obj)) { + return false; + } if (py::hasattr(obj, "__iter__")) { return true; } @@ -29,8 +25,12 @@ is_container(const T& obj) { // Specialization for py::str and py::bytes template -inline std::enable_if_t || std::is_same_v, bool> +inline std::enable_if_t< + std::is_same_v || + std::is_same_v, + bool> is_container(const T& obj) { + std::cout << "<-> is_container(str, bytes)" << std::endl; return false; } @@ -39,6 +39,7 @@ is_container(const T& obj) { template inline std::enable_if_t, bool> is_container(const T& obj) { + std::cout << "<-> is_container(type)" << std::endl; return false; } @@ -47,6 +48,7 @@ is_container(const T& obj) { template inline std::enable_if_t, bool> is_container(const T& obj) { + std::cout << "<-> is_container(memoryview)" << std::endl; return true; } diff --git a/pygim-common/_pygim/_utils/_fileutils.py b/pygim-common/_pygim/_utils/_fileutils.py index 41452a6..311053a 100644 --- a/pygim-common/_pygim/_utils/_fileutils.py +++ b/pygim-common/_pygim/_utils/_fileutils.py @@ -4,18 +4,20 @@ ''' from pathlib import Path -from .._iterlib import flatten, is_container +try: + from ..common_fast import flatten +except ImportError: + from .._iterlib import flatten -def flatten_paths(paths, pattern): - if isinstance(paths, Path): - if paths.is_dir(): - yield paths - ps = list(paths.rglob(pattern)) + +def flatten_paths(*paths, pattern): + for path in flatten(paths): + path = Path(path) + + if path.is_dir(): + yield path + ps = list(path.rglob(pattern)) yield from ps else: - yield paths - else: - assert is_container(paths), f'Expected `iterable` got `{type(paths).__name__}`' - for path in flatten(paths): - yield from flatten_paths(Path(path), pattern) \ No newline at end of file + yield path diff --git a/pygim-common/_pygim/common_fast.cpp b/pygim-common/_pygim/common_fast.cpp index f9fc73b..dc6d24c 100644 --- a/pygim-common/_pygim/common_fast.cpp +++ b/pygim-common/_pygim/common_fast.cpp @@ -15,12 +15,12 @@ PYBIND11_MODULE(common_fast, m) m.doc() = "Python Gimmicks Common library."; // optional module docstring // is_container - m.def("is_container", (bool (*)(const py::type&)) &is_container, "A generic function that checks if a Python type is a container."); m.def("is_container", (bool (*)(const py::str&)) &is_container, "A function that checks if a Python str is a container."); m.def("is_container", (bool (*)(const py::bytes&)) &is_container, "A function that checks if a Python bytes is a container."); + m.def("is_container", (bool (*)(const py::type&)) &is_container, "A generic function that checks if a Python type is a container."); m.def("is_container", (bool (*)(const py::iterable&)) &is_container, "A function that converts an iterable to a tuple."); m.def("is_container", (bool (*)(const py::memoryview&)) &is_container, "A function that checks if a Python memoryview is a container."); - m.def("is_container", (bool (*)(const py::handle&)) &is_container, "A generic function that checks if a Python object is a container."); + //m.def("is_container", (bool (*)(const py::handle&)) &is_container, "A generic function that checks if a Python object is a container."); // tuplify m.def("tuplify", (py::tuple (*)(const py::bytes&)) &tuplify, "A function that converts a bytes object to a single-element tuple"); @@ -31,25 +31,28 @@ PYBIND11_MODULE(common_fast, m) m.def("tuplify", (py::tuple (*)(const py::handle&)) &tuplify, "A function that converts a generic object to a single-element tuple."); py::class_(m, "flatten") - .def(py::init([](py::object objs) { return new FlattenGenerator(_ensure_iter(objs)); })) + .def(py::init([](py::object objs) { + std::cout << "-> init" << std::endl; + return new FlattenGenerator(_ensure_iter(objs)); + })) .def("__iter__", [](const py::object &self) { return self; }) .def("__next__", - [](FlattenGenerator *self) - { - // std::cout << "-> __next__" << std::endl; - if (self->isComplete()) - { - // std::cout << "<- __next__ (complete)" << std::endl; - throw py::stop_iteration(); - } - - //py::gil_scoped_release release; - auto result = self->next(); - - // std::cout << "<- next" << std::endl; - return result; - }); + [](FlattenGenerator *self) + { + std::cout << "-> __next__" << std::endl; + if (self->isComplete()) + { + self->iterators.clear(); + throw py::stop_iteration(); + } + + //py::gil_scoped_release release; + auto result = self->next(); + + std::cout << "<- __next__" << py::str(result) << std::endl; + return result; + }); #ifdef VERSION_INFO m.attr("__version__") = MACRO_STRINGIFY(VERSION_INFO); diff --git a/pygim-common/pygim/fileio/pathset.py b/pygim-common/pygim/fileio/pathset.py index 903a26a..da147b9 100644 --- a/pygim-common/pygim/fileio/pathset.py +++ b/pygim-common/pygim/fileio/pathset.py @@ -7,14 +7,12 @@ from pathlib import Path from dataclasses import dataclass -from pygim.iterlib import flatten, is_container +from pygim.iterlib import is_container +from _pygim._utils._fileutils import flatten_paths __all__ = ["PathSet"] - - - class _FileSystemOps: """Functionality to manipulate the filesystem.""" @@ -92,7 +90,7 @@ def __post_init__(self): # We just handled the optional part, let's make mypy happy. assert paths is not None - super().__setattr__("_paths", frozenset(flatten_paths([paths], self._paths))) + super().__setattr__("_paths", frozenset(flatten_paths(paths, pattern=self._pattern))) assert all([isinstance(p, Path) for p in self._paths]) assert isinstance(self._paths, frozenset) diff --git a/pygim-common/tests/unit_tests/test_fileutils.py b/pygim-common/tests/unit_tests/test_fileutils.py index 8f3196a..67f69ad 100644 --- a/pygim-common/tests/unit_tests/test_fileutils.py +++ b/pygim-common/tests/unit_tests/test_fileutils.py @@ -5,7 +5,7 @@ def test_flatten_paths_on_flat_dir(filled_temp_dir): - files = list(flatten_paths(filled_temp_dir, "*")) + files = list(flatten_paths(filled_temp_dir, pattern="*")) files = [d.name for d in files[1:]] assert files == ['AUTHORS.rst', 'readme.txt', 'readme.rst'] @@ -21,7 +21,7 @@ def test_flatten_paths_on_deep_dir(filled_temp_dir): (t_dir_1 / "test.txt").touch() (t_dir_2 / "test.txt").touch() - files = list(flatten_paths(filled_temp_dir, "*")) + files = list(flatten_paths(filled_temp_dir, pattern="*")) files = [d.name for d in files[1:]] assert files == ['AUTHORS.rst', 'test1', 'readme.txt', 'readme.rst', diff --git a/pygim-common/tests/unit_tests/test_flatten.py b/pygim-common/tests/unit_tests/test_flatten.py index 001d2cd..cbe0169 100644 --- a/pygim-common/tests/unit_tests/test_flatten.py +++ b/pygim-common/tests/unit_tests/test_flatten.py @@ -2,6 +2,7 @@ #type: ignore """ Test utility functions. """ +from pathlib import Path from functools import singledispatch import pytest @@ -17,51 +18,52 @@ def equals(left: object, right): t = tuple FLATTEN_TESTS = [ - ([], []), - ([1, 2, 3], [1, 2, 3]), - ([[1], [2], [3]], [1, 2, 3]), - ([[[[1]]], [[[2]]], [[[3]]]], [1, 2, 3]), - (set([1, 2, 3]), [1, 2, 3]), - ([set([1]), [[[2]]], 3], [1, 2, 3]), - (tuple([1, 2, 3]), [1, 2, 3]), - ([1, [], 2], [1, 2]), - ((1,2,(3,4)), [1,2,3,4]), - ((1,2,set([3,4])), [1,2,3,4]), + #([], []), + #([1, 2, 3], [1, 2, 3]), + #([[1], [2], [3]], [1, 2, 3]), + #([[[[1]]], [[[2]]], [[[3]]]], [1, 2, 3]), + #(set([1, 2, 3]), [1, 2, 3]), + #([set([1]), [[[2]]], 3], [1, 2, 3]), + #(tuple([1, 2, 3]), [1, 2, 3]), + #([1, [], 2], [1, 2]), + #((1,2,(3,4)), [1,2,3,4]), + #((1,2,set([3,4])), [1,2,3,4]), (range(10000), list(range(10000))), - ([[[[[[[[[[[]]]]]]]]]]], []), - ([[[[[[[[[[[1]]]]]]]]]]], [1]), - (set([t([t([t([])])])]), []), - (set([t([t([t([1])])])]), [1]), - - (["one", 2, 3], ["one", 2, 3]), - ([["one"], [2], [3]], ["one", 2, 3]), - ([[[["one"]]], [[[2]]], [[[3]]]], ["one", 2, 3]), - (set(["one", 2, 3]), ["one", 2, 3]), - ([set(["one"]), [[[2]]], 3], ["one", 2, 3]), - - (["one", "two", "three"], ["one", "two", "three"]), - ([["one"], ["two"], ["three"]], ["one", "two", "three"]), - ([[[["one"]]], [[["two"]]], [[["three"]]]], ["one", "two", "three"]), - (set(["one", "two", "three"]), ["one", "two", "three"]), - ([set(["one"]), [[["two"]]], "three"], ["one", "two", "three"]), - - ([str, None, False], [str, None, False]), - ([[str], [None], [False]], [str, None, False]), - ([[[[str]]], [[[None]]], [[[False]]]], [str, None, False]), - (set([str, None, False]), [str, None, False]), - ([set([str]), [[[None]]], False], [str, None, False]), - (iter([1, 2, 3]), [1, 2, 3]), - ((i for i in range(1, 4)), [1, 2, 3]), - ({"a": 1}, [("a", 1)]), - - (["keep as is"], ["keep as is"]), - ([b"keep as is"], [b"keep as is"]), - (memoryview(b"keep as is"), [b"keep as is"]), - (1, [1]), - (123.456, [123.456]), - (complex(1, 2), [complex(1, 2)]), - (None, [None]), - (True, [True]), + #([[[[[[[[[[[]]]]]]]]]]], []), + #([[[[[[[[[[[1]]]]]]]]]]], [1]), + #(set([t([t([t([])])])]), []), + #(set([t([t([t([1])])])]), [1]), + + #(["one", 2, 3], ["one", 2, 3]), + # ([["one"], [2], [3]], ["one", 2, 3]), + # ([[[["one"]]], [[[2]]], [[[3]]]], ["one", 2, 3]), + # (set(["one", 2, 3]), ["one", 2, 3]), + # ([set(["one"]), [[[2]]], 3], ["one", 2, 3]), + + # (["one", "two", "three"], ["one", "two", "three"]), + # ([["one"], ["two"], ["three"]], ["one", "two", "three"]), + # ([[[["one"]]], [[["two"]]], [[["three"]]]], ["one", "two", "three"]), + # (set(["one", "two", "three"]), ["one", "two", "three"]), + # ([set(["one"]), [[["two"]]], "three"], ["one", "two", "three"]), + + # ([str, None, False], [str, None, False]), + # ([[str], [None], [False]], [str, None, False]), + # ([[[[str]]], [[[None]]], [[[False]]]], [str, None, False]), + # (set([str, None, False]), [str, None, False]), + # ([set([str]), [[[None]]], False], [str, None, False]), + # (iter([1, 2, 3]), [1, 2, 3]), + # ((i for i in range(1, 4)), [1, 2, 3]), + # ({"a": 1}, [("a", 1)]), + + # (["keep as is"], ["keep as is"]), + # ([b"keep as is"], [b"keep as is"]), + # (memoryview(b"keep as is"), [b"keep as is"]), + # (1, [1]), + # (123.456, [123.456]), + # (complex(1, 2), [complex(1, 2)]), + # (None, [None]), + # (True, [True]), + # ([Path.home(), Path.home(), Path.home()], [Path.home(), Path.home(), Path.home()]), ] @pytest.mark.parametrize("input,expected_result", FLATTEN_TESTS) @@ -80,7 +82,20 @@ def test_flatten(input, expected_result): assert False, f"Results differ:\n ACTUAL: {list(flatten(input))}\nEXPECTED: {expected_result} " +@pytest.mark.parametrize("input,expected_result", FLATTEN_TESTS) +def test_flatten_fast(input, expected_result): + try: + expected_result = input.__class__(expected_result) + except (TypeError, ValueError): + expected_result = list(expected_result) + actual_result = flatten_fast(input) + try: + actual_result = input.__class__(actual_result) + except (TypeError, ValueError): + actual_result = list(actual_result) + if not equals(actual_result, expected_result): + assert False, f"Results differ:\n ACTUAL: {list(flatten_fast(input))}\nEXPECTED: {expected_result} " if __name__ == "__main__": - pytest.main([__file__]) \ No newline at end of file + pytest.main([__file__, "--capture=no"]) \ No newline at end of file diff --git a/pygim-common/tests/unit_tests/test_iterlib.py b/pygim-common/tests/unit_tests/test_iterlib.py index e61a2d5..1722ee5 100644 --- a/pygim-common/tests/unit_tests/test_iterlib.py +++ b/pygim-common/tests/unit_tests/test_iterlib.py @@ -3,7 +3,8 @@ from _pygim.common_fast import ( tuplify as tuplify_fast, - is_container as is_container_fast) + is_container as is_container_fast, + ) from _pygim._iterlib import is_container, tuplify diff --git a/pygim-common/tests/unit_tests/test_pathset.py b/pygim-common/tests/unit_tests/test_pathset.py index 6e55174..b150eb2 100644 --- a/pygim-common/tests/unit_tests/test_pathset.py +++ b/pygim-common/tests/unit_tests/test_pathset.py @@ -6,7 +6,7 @@ import pytest from pygim.fileio import PathSet -from pygim.fileio.pathset import flatten_paths + # FIXME: TemporaryDirectory().cleanup() fails due to some weird # PermissionError in Windows environment in GitHub. @@ -129,10 +129,6 @@ def test_dropped_files_based_on_filter(filled_temp_dir): assert new_paths == PathSet([]) -def test_flatten_paths(temp_dir): - pass - - if __name__ == "__main__": from pygim.testing import run_tests run_tests(__file__, PathSet.__module__, coverage=False) \ No newline at end of file From 6b6fb7118c2dd4e9ddbd1b2f94d502a1af11f71a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Teppo=20Per=C3=A4?= Date: Sun, 9 Jul 2023 16:00:09 +0100 Subject: [PATCH 8/8] improvements to flatten --- pygim-common/_pygim/_iterlib_fast/flatten.cpp | 12 ++- pygim-common/_pygim/_iterlib_fast/flatten.h | 2 + pygim-common/_pygim/_iterlib_fast/iterutils.h | 40 +++++++--- pygim-common/_pygim/common_fast.cpp | 20 +++-- pygim-common/pygim/__init__.py | 6 +- pygim-common/pygim/iterlib.py | 19 +++-- pygim-common/setup.py | 1 + pygim-common/tests/unit_tests/test_flatten.py | 79 ++++++++++--------- pygim-common/tests/unit_tests/test_iterlib.py | 17 +++- 9 files changed, 127 insertions(+), 69 deletions(-) diff --git a/pygim-common/_pygim/_iterlib_fast/flatten.cpp b/pygim-common/_pygim/_iterlib_fast/flatten.cpp index 4ea61c8..d7f88ec 100644 --- a/pygim-common/_pygim/_iterlib_fast/flatten.cpp +++ b/pygim-common/_pygim/_iterlib_fast/flatten.cpp @@ -9,15 +9,19 @@ FlattenGenerator::FlattenGenerator() {} FlattenGenerator::~FlattenGenerator() { - std::cout << "destruct()"; - std::cout << py::str(iterators.back()) << std::endl; - iterators.clear(); + //iterators.clear(); } FlattenGenerator::FlattenGenerator(py::iterator items) { iterators.push_back(items); } + +FlattenGenerator::FlattenGenerator(const FlattenGenerator& other) : + iterators(other.iterators) { +} + + bool FlattenGenerator::isComplete() { while (!iterators.empty() && iterators.back() == py::iterator::sentinel()) { iterators.pop_back(); @@ -40,7 +44,7 @@ py::handle FlattenGenerator::next() { throw py::stop_iteration(); } } else { - std::cout << "<- next() " << std::endl; + std::cout << "<- next() " << py::str(last) << std::endl; return last; } } diff --git a/pygim-common/_pygim/_iterlib_fast/flatten.h b/pygim-common/_pygim/_iterlib_fast/flatten.h index a9bd6f1..b266eb5 100644 --- a/pygim-common/_pygim/_iterlib_fast/flatten.h +++ b/pygim-common/_pygim/_iterlib_fast/flatten.h @@ -9,6 +9,7 @@ namespace py = pybind11; inline py::iterator _ensure_iter(py::handle obj) { + std::cout << "ensure_iter" << std::endl; if (py::isinstance(obj)) { return obj.cast(); } @@ -25,6 +26,7 @@ class FlattenGenerator { public: FlattenGenerator(); FlattenGenerator(py::iterator items); + FlattenGenerator(const FlattenGenerator&); ~FlattenGenerator(); bool isComplete(); diff --git a/pygim-common/_pygim/_iterlib_fast/iterutils.h b/pygim-common/_pygim/_iterlib_fast/iterutils.h index 43f82d5..9cd19fb 100644 --- a/pygim-common/_pygim/_iterlib_fast/iterutils.h +++ b/pygim-common/_pygim/_iterlib_fast/iterutils.h @@ -9,46 +9,62 @@ namespace py = pybind11; template inline std::enable_if_t< !std::is_same_v && + !std::is_same_v && + !std::is_same_v && + !std::is_same_v && !std::is_same_v && !std::is_same_v && + !std::is_same_v && + !std::is_same_v && + !std::is_same_v && + !std::is_same_v && + !std::is_same_v && + !std::is_same_v && + !std::is_same_v && !std::is_same_v, bool> is_container(const T& obj) { + std::cout << "is_container" << std::endl; if (py::isinstance(obj) | py::isinstance(obj)) { + std::cout << "isinstance || isinstance" << std::endl; return false; } if (py::hasattr(obj, "__iter__")) { + std::cout << "hasattr __iter__" << std::endl; return true; } + std::cout << "return false" << std::endl; return false; } // Specialization for py::str and py::bytes template inline std::enable_if_t< + std::is_same_v || + std::is_same_v || + std::is_same_v || std::is_same_v || + std::is_same_v || std::is_same_v, bool> is_container(const T& obj) { - std::cout << "<-> is_container(str, bytes)" << std::endl; return false; } // Specialization for py::memoryview template -inline std::enable_if_t, bool> -is_container(const T& obj) { - std::cout << "<-> is_container(type)" << std::endl; - return false; -} - - -// Specialization for py::memoryview -template -inline std::enable_if_t, bool> +inline std::enable_if_t< + std::is_same_v || + std::is_same_v || + std::is_same_v || + std::is_same_v || + std::is_same_v || + std::is_same_v || + std::is_same_v || + std::is_same_v, + bool> is_container(const T& obj) { - std::cout << "<-> is_container(memoryview)" << std::endl; return true; } diff --git a/pygim-common/_pygim/common_fast.cpp b/pygim-common/_pygim/common_fast.cpp index dc6d24c..3626242 100644 --- a/pygim-common/_pygim/common_fast.cpp +++ b/pygim-common/_pygim/common_fast.cpp @@ -10,17 +10,26 @@ namespace py = pybind11; + PYBIND11_MODULE(common_fast, m) { m.doc() = "Python Gimmicks Common library."; // optional module docstring // is_container + m.def("is_container", (bool (*)(const py::int_&)) &is_container, "A function that checks if a Python str is a container."); + m.def("is_container", (bool (*)(const py::float_&)) &is_container, "A function that checks if a Python str is a container."); m.def("is_container", (bool (*)(const py::str&)) &is_container, "A function that checks if a Python str is a container."); + m.def("is_container", (bool (*)(const py::bytearray&)) &is_container, "A function that checks if a Python bytes is a container."); m.def("is_container", (bool (*)(const py::bytes&)) &is_container, "A function that checks if a Python bytes is a container."); + m.def("is_container", (bool (*)(const py::tuple&)) &is_container, "A function that checks if a Python bytes is a container."); + m.def("is_container", (bool (*)(const py::list&)) &is_container, "A function that checks if a Python bytes is a container."); + m.def("is_container", (bool (*)(const py::set&)) &is_container, "A function that checks if a Python bytes is a container."); + m.def("is_container", (bool (*)(const py::dict&)) &is_container, "A function that checks if a Python bytes is a container."); m.def("is_container", (bool (*)(const py::type&)) &is_container, "A generic function that checks if a Python type is a container."); - m.def("is_container", (bool (*)(const py::iterable&)) &is_container, "A function that converts an iterable to a tuple."); m.def("is_container", (bool (*)(const py::memoryview&)) &is_container, "A function that checks if a Python memoryview is a container."); - //m.def("is_container", (bool (*)(const py::handle&)) &is_container, "A generic function that checks if a Python object is a container."); + m.def("is_container", (bool (*)(const py::iterator&)) &is_container, "A function that checks if a Python memoryview is a container."); + m.def("is_container", (bool (*)(const py::iterable&)) &is_container, "A function that checks if a Python memoryview is a container."); + m.def("is_container", (bool (*)(const py::handle&)) &is_container, "A generic function that checks if a Python object is a container."); // tuplify m.def("tuplify", (py::tuple (*)(const py::bytes&)) &tuplify, "A function that converts a bytes object to a single-element tuple"); @@ -32,9 +41,9 @@ PYBIND11_MODULE(common_fast, m) py::class_(m, "flatten") .def(py::init([](py::object objs) { - std::cout << "-> init" << std::endl; + //std::cout << "-> init" << std::endl; return new FlattenGenerator(_ensure_iter(objs)); - })) + }), py::keep_alive<1, 0>()) .def("__iter__", [](const py::object &self) { return self; }) .def("__next__", @@ -43,11 +52,10 @@ PYBIND11_MODULE(common_fast, m) std::cout << "-> __next__" << std::endl; if (self->isComplete()) { - self->iterators.clear(); + std::cout << "stop" << std::endl; throw py::stop_iteration(); } - //py::gil_scoped_release release; auto result = self->next(); std::cout << "<- __next__" << py::str(result) << std::endl; diff --git a/pygim-common/pygim/__init__.py b/pygim-common/pygim/__init__.py index d7163c1..9f74962 100644 --- a/pygim-common/pygim/__init__.py +++ b/pygim-common/pygim/__init__.py @@ -19,11 +19,11 @@ --------- dispatch.dispatch A function that supersedes `singledispatch(method)`. -iterarium.flatten(iterable) +iterlib.flatten(iterable) Convert nested arrays into a single flat array. -iterarium.is_container(obj) +iterlib.is_container(obj) Check whether an object is iterable but not a string or bytes. -iterarium.split(iterable, condition) +iterlib.split(iterable, condition) Split an iterable into two iterables based on a condition function. Examples diff --git a/pygim-common/pygim/iterlib.py b/pygim-common/pygim/iterlib.py index b171cb4..f3bdc0d 100644 --- a/pygim-common/pygim/iterlib.py +++ b/pygim-common/pygim/iterlib.py @@ -1,15 +1,24 @@ # -*- coding: utf-8 -*- """ +Functions +--------- +flatten(iterable) + Convert nested arrays into a single flat array. +is_container(obj) + Check whether an object is iterable but not a string or bytes. +split(iterable, condition) + Split an iterable into two iterables based on a condition function. """ try: - import _pygim.common_fast as _mod + import _pygim.common_fast as _lib except ImportError: - from _pygim import _iterlib as _mod + from _pygim import _iterlib as _lib +from _pygim._iterlib import split -__all__ = ["flatten", "is_container"] +__all__ = ["flatten", "is_container", "split"] -flatten = _mod.flatten -is_container = _mod.is_container +flatten = _lib.flatten +is_container = _lib.is_container diff --git a/pygim-common/setup.py b/pygim-common/setup.py index bac8782..733da90 100644 --- a/pygim-common/setup.py +++ b/pygim-common/setup.py @@ -19,6 +19,7 @@ list(str(p) for p in Path(ROOT).rglob("*.cpp")), # Example: passing in the version to the compiled code define_macros = [('VERSION_INFO', __version__)], + extra_compile_args=["-g"], ), ] diff --git a/pygim-common/tests/unit_tests/test_flatten.py b/pygim-common/tests/unit_tests/test_flatten.py index cbe0169..eaf1889 100644 --- a/pygim-common/tests/unit_tests/test_flatten.py +++ b/pygim-common/tests/unit_tests/test_flatten.py @@ -18,23 +18,26 @@ def equals(left: object, right): t = tuple FLATTEN_TESTS = [ - #([], []), - #([1, 2, 3], [1, 2, 3]), - #([[1], [2], [3]], [1, 2, 3]), - #([[[[1]]], [[[2]]], [[[3]]]], [1, 2, 3]), - #(set([1, 2, 3]), [1, 2, 3]), - #([set([1]), [[[2]]], 3], [1, 2, 3]), - #(tuple([1, 2, 3]), [1, 2, 3]), - #([1, [], 2], [1, 2]), - #((1,2,(3,4)), [1,2,3,4]), - #((1,2,set([3,4])), [1,2,3,4]), - (range(10000), list(range(10000))), - #([[[[[[[[[[[]]]]]]]]]]], []), - #([[[[[[[[[[[1]]]]]]]]]]], [1]), - #(set([t([t([t([])])])]), []), - #(set([t([t([t([1])])])]), [1]), - - #(["one", 2, 3], ["one", 2, 3]), + # ([], []), + # ([1, 2, 3], [1, 2, 3]), + # (list(range(10000)), list(range(10000))), + # ([[1], [2], [3]], [1, 2, 3]), + # ([[[[1]]], [[[2]]], [[[3]]]], [1, 2, 3]), + # (set([1, 2, 3]), [1, 2, 3]), + # ([set([1]), [[[2]]], 3], [1, 2, 3]), + # (tuple([1, 2, 3]), [1, 2, 3]), + # ([1, [], 2], [1, 2]), + # ((1,2,(3,4)), [1,2,3,4]), + # ((1,2,set([3,4])), [1,2,3,4]), + # (range(0), list(range(0))), + # (range(1), list(range(1))), + # #(range(10000), list(range(10000))), + # ([[[[[[[[[[[]]]]]]]]]]], []), + # ([[[[[[[[[[[1]]]]]]]]]]], [1]), + # (set([t([t([t([])])])]), []), + # (set([t([t([t([1])])])]), [1]), + + # (["one", 2, 3], ["one", 2, 3]), # ([["one"], [2], [3]], ["one", 2, 3]), # ([[[["one"]]], [[[2]]], [[[3]]]], ["one", 2, 3]), # (set(["one", 2, 3]), ["one", 2, 3]), @@ -46,24 +49,24 @@ def equals(left: object, right): # (set(["one", "two", "three"]), ["one", "two", "three"]), # ([set(["one"]), [[["two"]]], "three"], ["one", "two", "three"]), - # ([str, None, False], [str, None, False]), - # ([[str], [None], [False]], [str, None, False]), - # ([[[[str]]], [[[None]]], [[[False]]]], [str, None, False]), - # (set([str, None, False]), [str, None, False]), - # ([set([str]), [[[None]]], False], [str, None, False]), - # (iter([1, 2, 3]), [1, 2, 3]), - # ((i for i in range(1, 4)), [1, 2, 3]), - # ({"a": 1}, [("a", 1)]), - - # (["keep as is"], ["keep as is"]), - # ([b"keep as is"], [b"keep as is"]), - # (memoryview(b"keep as is"), [b"keep as is"]), - # (1, [1]), - # (123.456, [123.456]), - # (complex(1, 2), [complex(1, 2)]), - # (None, [None]), - # (True, [True]), - # ([Path.home(), Path.home(), Path.home()], [Path.home(), Path.home(), Path.home()]), + ([str, None, False], [str, None, False]), + ([[str], [None], [False]], [str, None, False]), + ([[[[str]]], [[[None]]], [[[False]]]], [str, None, False]), + (set([str, None, False]), [str, None, False]), + ([set([str]), [[[None]]], False], [str, None, False]), + (iter([1, 2, 3]), [1, 2, 3]), + ((i for i in range(1, 4)), [1, 2, 3]), + ({"a": 1}, ["a"]), + + (["keep as is"], ["keep as is"]), + ([b"keep as is"], [b"keep as is"]), + (memoryview(b"keep as is"), [b"keep as is"]), + (1, [1]), + (123.456, [123.456]), + (complex(1, 2), [complex(1, 2)]), + (None, [None]), + (True, [True]), + ([Path.home(), Path.home(), Path.home()], [Path.home(), Path.home(), Path.home()]), ] @pytest.mark.parametrize("input,expected_result", FLATTEN_TESTS) @@ -95,7 +98,11 @@ def test_flatten_fast(input, expected_result): except (TypeError, ValueError): actual_result = list(actual_result) if not equals(actual_result, expected_result): - assert False, f"Results differ:\n ACTUAL: {list(flatten_fast(input))}\nEXPECTED: {expected_result} " + assert False, "\n".join([ + f"Results differ for input {input}:", + f"(fast) ACTUAL: {list(flatten_fast(input))}", + f" ACTUAL: {list(flatten(input))}", + f" EXPECTED: {expected_result}"]) if __name__ == "__main__": pytest.main([__file__, "--capture=no"]) \ No newline at end of file diff --git a/pygim-common/tests/unit_tests/test_iterlib.py b/pygim-common/tests/unit_tests/test_iterlib.py index 1722ee5..2278941 100644 --- a/pygim-common/tests/unit_tests/test_iterlib.py +++ b/pygim-common/tests/unit_tests/test_iterlib.py @@ -6,6 +6,8 @@ is_container as is_container_fast, ) from _pygim._iterlib import is_container, tuplify +import numpy as np +import pandas as pd class CustomIterableObject: @@ -46,6 +48,11 @@ def test_split(input, func, expected_result): (set, False), # set-type is not a container (frozenset, False), # frozenset-type is not a container (dict, False), # dict-type is not a container + (np.float64, False), # numpy-type is not a container + (np.float32, False), # numpy-type is not a container + (np.float16, False), # numpy-type is not a container + (np.nan, False), # numpy-type is not a container + (pd.NA, False), # pandas-type is not a container ((1, 2, 3), True), # Tuple is a container ([1, 2, 3], True), # List is a container @@ -67,6 +74,10 @@ def test_split(input, func, expected_result): (CustomNonIterableObject(), False), # Custom non-iterable object is not a container ([[1, 2, 3]], True), # List of lists is a container ({'a': set([1, 2, 3])}, True), # Dictionary of sets is a container + + (np.ndarray([]), True), # ndarray-type is a container + (pd.Series([], dtype=int), True), # Series-type is a container + (pd.DataFrame([]), True), # DataFrame-type is a container ]) def test_is_container_with_various_types(input, expected_result): @@ -76,8 +87,8 @@ def test_is_container_with_various_types(input, expected_result): if not (actual_result == actual_result_fast == expected_result): assert False, "\n".join([ f"Results differ for `{input}`:", - f" ACTUAL: {actual_result}", - f"ACTUAL (fast): {actual_result_fast}", + f" ACTUAL: {is_container(input)}", + f"ACTUAL (fast): {is_container_fast(input)}", f" EXPECTED: {expected_result} ", ]) @@ -117,4 +128,4 @@ def test_tuplify_with_generators(): if __name__ == "__main__": import pytest - pytest.main([__file__]) + pytest.main([__file__, "--capture=no"])