Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/fast flatten #21

Open
wants to merge 9 commits into
base: dev
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# type: ignore
from pygim.gimmicks import EntangledClass
from pygim.exceptions import EntangledMethodError
from pygim.explib import EntangledMethodError

# This creates inheritable class locally so that it is more evident, which class is
# used.
Expand Down
82 changes: 82 additions & 0 deletions pygim-common/_pygim/_iterlib.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
# -*- coding: utf-8 -*-
"""
This module contains internal utility functions.
"""

from pathlib import Path

__all__ = ("split", "flatten", "is_container")


def split(iterable, condition):
"""
Split an iterable object into two lists based on a given condition.

Parameters
----------
iterable : `iterable`
Any iterable that needs to be split in two.
condition : `callable`
A function that takes a simple argument and returns a boolean value.
The argument is used to decide which list the item should go into.

Returns
-------
`tuple` [`list` `list`]
A tuple containing two lists. The first list contains items that satisfy
the condition, while the second list contains the remaining items.

Notes
-----
The input iterable can be any iterable object such as string, tuple, list, set,
or generator.

Examples
--------
>>> numbers = [1, 2, 3, 4, 5]
>>> def is_even(n):
... return n % 2 == 0
...
>>> even_numbers, odd_numbers = split_iterable(numbers, is_even)
>>> even_numbers
[2, 4]
>>> odd_numbers
[1, 3, 5]
"""
left = []
right = []

for it in iterable:
if condition(it):
left.append(it)
else:
right.append(it)

return left, right


def tuplify(obj):
if isinstance(obj, dict):
return tuple((k, v) for k, v in obj.items())
if is_container(obj):
return tuple(list(obj))
return obj,


def is_container(obj):

if isinstance(obj, (str, bytes, type, Path)):
return False

if hasattr(obj, "__iter__"):
return True

return isinstance(obj, memoryview)


def flatten(iterable):
if is_container(iterable):
for o in iterable:
yield from flatten(o)
else:
yield iterable
50 changes: 50 additions & 0 deletions pygim-common/_pygim/_iterlib_fast/flatten.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
#include <pybind11/pybind11.h>
#include <pybind11/pytypes.h>
#include <pybind11/stl.h>
#include <iostream> // std::string

#include "flatten.h"
#include "iterutils.h"

FlattenGenerator::FlattenGenerator() {}

FlattenGenerator::~FlattenGenerator() {
//iterators.clear();
}

FlattenGenerator::FlattenGenerator(py::iterator items) {
iterators.push_back(items);
}


FlattenGenerator::FlattenGenerator(const FlattenGenerator& other) :
iterators(other.iterators) {
}


bool FlattenGenerator::isComplete() {
while (!iterators.empty() && iterators.back() == py::iterator::sentinel()) {
iterators.pop_back();
}
return iterators.empty();
}

py::handle FlattenGenerator::next() {
std::cout << "-> next()" << std::endl;
py::iterator &it = iterators.back();
auto last = *it;
++it;

if (is_container(last)) {
std::cout << "it's a list!" << py::str(last) << std::endl;
iterators.push_back(py::iter(last));
if (!isComplete()) {
return next();
} else {
throw py::stop_iteration();
}
} else {
std::cout << "<- next() " << py::str(last) << std::endl;
return last;
}
}
40 changes: 40 additions & 0 deletions pygim-common/_pygim/_iterlib_fast/flatten.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
#ifndef FLATTEN_GENERATOR_H
#define FLATTEN_GENERATOR_H

#include <vector>
#include <pybind11/pybind11.h>

#include "iterutils.h"

namespace py = pybind11;

inline py::iterator _ensure_iter(py::handle obj) {
std::cout << "ensure_iter" << std::endl;
if (py::isinstance<py::iterator>(obj)) {
return obj.cast<py::iterator>();
}

if (!is_container(obj)) {
return py::iter(tuplify(obj));
}

return py::iter(obj);
};


class FlattenGenerator {
public:
FlattenGenerator();
FlattenGenerator(py::iterator items);
FlattenGenerator(const FlattenGenerator&);
~FlattenGenerator();

bool isComplete();

py::handle next();

private:
std::vector<py::iterator> iterators;
};

#endif // FLATTEN_GENERATOR_H
99 changes: 99 additions & 0 deletions pygim-common/_pygim/_iterlib_fast/iterutils.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
#pragma once

#include <pybind11/pybind11.h>
#include <iostream> // std::string

namespace py = pybind11;

// Base case function template for generic types
template <typename T>
inline std::enable_if_t<
!std::is_same_v<T, py::str> &&
!std::is_same_v<T, py::int_> &&
!std::is_same_v<T, py::float_> &&
!std::is_same_v<T, py::bool_> &&
!std::is_same_v<T, py::bytes> &&
!std::is_same_v<T, py::memoryview> &&
!std::is_same_v<T, py::bytearray> &&
!std::is_same_v<T, py::tuple> &&
!std::is_same_v<T, py::dict> &&
!std::is_same_v<T, py::list> &&
!std::is_same_v<T, py::set> &&
!std::is_same_v<T, py::iterator> &&
!std::is_same_v<T, py::iterable> &&
!std::is_same_v<T, py::type>,
bool>
is_container(const T& obj) {
std::cout << "is_container" << std::endl;
if (py::isinstance<py::str>(obj) | py::isinstance<py::bytes>(obj)) {
std::cout << "isinstance<str> || isinstance<bytes>" << std::endl;
return false;
}
if (py::hasattr(obj, "__iter__")) {
std::cout << "hasattr __iter__" << std::endl;
return true;
}
std::cout << "return false" << std::endl;
return false;
}

// Specialization for py::str and py::bytes
template <typename T>
inline std::enable_if_t<
std::is_same_v<T, py::bool_> ||
std::is_same_v<T, py::int_> ||
std::is_same_v<T, py::float_> ||
std::is_same_v<T, py::str> ||
std::is_same_v<T, py::type> ||
std::is_same_v<T, py::bytes>,
bool>
is_container(const T& obj) {
return false;
}


// Specialization for py::memoryview
template <typename T>
inline std::enable_if_t<
std::is_same_v<T, py::set> ||
std::is_same_v<T, py::list> ||
std::is_same_v<T, py::tuple> ||
std::is_same_v<T, py::bytearray> ||
std::is_same_v<T, py::dict> ||
std::is_same_v<T, py::iterator> ||
std::is_same_v<T, py::iterable> ||
std::is_same_v<T, py::memoryview>,
bool>
is_container(const T& obj) {
return true;
}



inline py::tuple tuplify(const py::tuple& arg) {
return arg;
};

inline py::tuple tuplify(const py::dict& arg) {
py::list kv_pairs;
for (const auto& item : arg) {
kv_pairs.append(py::make_tuple(item.first, item.second));
}
return py::tuple(kv_pairs);
};

inline py::tuple tuplify(const py::iterable& arg) {
return py::tuple(arg);
};

inline py::tuple tuplify(const py::handle& arg) {
return py::make_tuple(arg);
};

inline py::tuple tuplify(const py::str& arg) {
return py::make_tuple(arg);
};

inline py::tuple tuplify(const py::bytes& arg) {
return py::make_tuple(arg);
};
2 changes: 1 addition & 1 deletion pygim-common/_pygim/_utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
Internal utilities package.
'''

from ._iterable import *
from .._iterlib import *
from ._inspect import *


Expand Down
23 changes: 23 additions & 0 deletions pygim-common/_pygim/_utils/_fileutils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# -*- coding: utf-8 -*-
'''
Internal package for file utils.
'''

from pathlib import Path

try:
from ..common_fast import flatten
except ImportError:
from .._iterlib import flatten


def flatten_paths(*paths, pattern):
for path in flatten(paths):
path = Path(path)

if path.is_dir():
yield path
ps = list(path.rglob(pattern))
yield from ps
else:
yield path
2 changes: 1 addition & 1 deletion pygim-common/_pygim/_utils/_inspect.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import inspect
import types

from ._iterable import flatten
from .._iterlib import flatten

__all__ = ('TraitFunctions', 'has_instances', 'is_subset')

Expand Down
Loading