diff --git a/.github/workflows/python-cibuildwheel-pybind.yml b/.github/workflows/python-cibuildwheel-pybind.yml new file mode 100644 index 000000000..b67bef836 --- /dev/null +++ b/.github/workflows/python-cibuildwheel-pybind.yml @@ -0,0 +1,171 @@ +name: Python Pybind cibuildwheel + +on: + push: + branches: [master, release-*] + pull_request: + branches: [master] + workflow_dispatch: + +jobs: + build_wheels: + name: pybind11-cibuildwheel ${{ matrix.os }}/${{ matrix.arch }}/${{ matrix.flavor }}/${{ matrix.target }} + runs-on: ${{ matrix.os }} + strategy: + matrix: + os: [ubuntu-22.04] + # separate archs, so they use individual caches + arch: ["x86_64", "arm64"] + flavor: ["cpython", "pypy"] + # separate musl and many on linux, for mac we just skip one of those + target: ["many", "musl"] + exclude: + - os: ubuntu-22.04 + target: musl + flavor: pypy + steps: + - uses: actions/checkout@v4 + - name: Set up QEMU + if: ${{ (runner.os == 'Linux') && (matrix.arch == 'arm64') }} + uses: docker/setup-qemu-action@v3 + with: + platforms: all + - name: ccache + uses: hendrikmuhs/ccache-action@v1.2.11 + with: + key: ${{ matrix.os }}-${{ matrix.arch }}-${{ matrix.target }}-${{ matrix.flavor }}-python + + - name: Sets env for x86_64 + run: | + echo "CIBW_ARCHS_LINUX=auto64" >> $GITHUB_ENV + echo "CIBW_ARCHS_MACOS=x86_64" >> $GITHUB_ENV + if: matrix.arch == 'x86_64' + + - name: Sets env for arm64 + run: | + echo "CIBW_ARCHS_LINUX=aarch64" >> $GITHUB_ENV + echo "CIBW_ARCHS_MACOS=arm64" >> $GITHUB_ENV + if: matrix.arch == 'arm64' + + - name: Skip manylinux for musllinux target + if: ${{ (runner.os == 'Linux') && (matrix.target == 'musl') }} + run: | + echo "CIBW_SKIP=*manylinux*" >> $GITHUB_ENV + + - name: Skip musllinux for manylinux target + if: ${{ (runner.os == 'Linux') && (matrix.target == 'many') }} + run: | + echo "CIBW_SKIP=*musllinux*" >> $GITHUB_ENV + + - name: Skip pypy for cpython + if: ${{ matrix.flavor == 'cpython' }} + run: | + echo "CIBW_SKIP=${{ env.CIBW_SKIP }} pp*" >> $GITHUB_ENV + + - name: Skip cpython for pypy + if: ${{ matrix.flavor == 'pypy' }} + run: | + echo "CIBW_SKIP=${{ env.CIBW_SKIP }} cp*" >> $GITHUB_ENV + + - name: install mac dependencies + if: ${{ runner.os == 'macOS' }} + # 2nd command: workaround https://github.com/actions/setup-python/issues/577 + run: | + brew update && \ + brew list -1 | grep python | while read formula; do brew unlink $formula; brew link --overwrite $formula; done && \ + brew install ccache + + - name: install mac dependencies X86_64 + if: ${{ (runner.os == 'macOS') && (matrix.arch == 'x86_64') }} + run: | + brew update && \ + brew install zlib snappy boost + + - name: install mac dependencies arm64 + if: ${{ (runner.os == 'macOS') && (matrix.arch == 'arm64') }} + run: | + set -e + echo "MACOSX_DEPLOYMENT_TARGET=12.3.0" >> $GITHUB_ENV + echo "_CMAKE_PREFIX_PATH=${{ github.workspace }}/arm64-homebrew" >> $GITHUB_ENV + echo "CIBW_REPAIR_WHEEL_COMMAND_MACOS=DYLD_LIBRARY_PATH=${{ github.workspace }}/arm64-homebrew delocate-wheel --require-archs {delocate_archs} -w {dest_dir} -v {wheel}" >> $GITHUB_ENV + mkdir arm64-homebrew && curl -L https://github.com/Homebrew/brew/tarball/master | tar xz --strip 1 -C arm64-homebrew + PACKAGES=( icu4c xz lz4 zstd zlib snappy boost ) + for PACKAGE in "${PACKAGES[@]}" + do + response=$(arm64-homebrew/bin/brew fetch --force --bottle-tag=arm64_sonoma $PACKAGE | grep Downloaded ) + download_path=$(echo $response | xargs -n 1 | tail -1) + arm64-homebrew/bin/brew reinstall -vd $download_path + done + arm64-homebrew/bin/brew config + ls /Users/runner/work/keyvi/keyvi/arm64-homebrew + + - name: Build python wheels for ${{ matrix.os }} on ${{ matrix.arch }} + uses: pypa/cibuildwheel@v2.21.3 + env: + # Skip CPython 3.6 and CPython 3.7 + CIBW_SKIP: ${{ env.CIBW_SKIP }} cp36-* cp37-* pp37-* + + # skip testing all python versions on linux arm, only test 3.12 + # skip tests on pypy, currently fails for indexer tests + CIBW_TEST_SKIP: "*p{38,39,310,311}-m*linux_aarch64 pp*" + + # (many)linux custom docker images + CIBW_MANYLINUX_X86_64_IMAGE: "keyvidev/manylinux-builder-x86_64" + CIBW_MANYLINUX_AARCH64_IMAGE: "keyvidev/manylinux-builder-aarch64" + CIBW_MUSLLINUX_X86_64_IMAGE: "keyvidev/musllinux-builder-x86_64" + CIBW_MUSLLINUX_AARCH64_IMAGE: "keyvidev/musllinux-builder-aarch64" + + # ccache using path + CIBW_ENVIRONMENT_MACOS: PATH=/usr/local/opt/ccache/libexec:$PATH + CIBW_ENVIRONMENT_LINUX: PATH=/usr/local/bin:/usr/lib/ccache:$PATH CCACHE_DIR=/host${{ github.workspace }}/.ccache CCACHE_CONFIGPATH=/host/home/runner/.config/ccache/ccache.conf + + # for debugging set this to 1,2 or 3 + # CIBW_BUILD_VERBOSITY: 2 + + - uses: actions/upload-artifact@v4 + with: + name: artifact-${{ matrix.os }}-${{ matrix.arch }}-${{ matrix.flavor }}-${{ matrix.target }} + path: ./wheelhouse/*.whl + + build_sdist: + name: sdist + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: install Linux deps + run: | + sudo apt-get update && \ + sudo apt-get install -y libsnappy-dev libzzip-dev zlib1g-dev libboost-all-dev ccache ninja-build pipx + - name: ccache + uses: hendrikmuhs/ccache-action@v1.2.11 + with: + key: ${{ matrix.os }}-sdist-python + + - name: Build SDist + run: | + export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH" + pipx run build --sdist + python -m pip install dist/*.tar.gz -v && \ + python -m pip install pytest && \ + python -m pytest python-pybind/tests && \ + python -m pip uninstall -y keyvi_pybind11 + + - uses: actions/upload-artifact@v4 + with: + name: artifact-sdist + path: dist/*.tar.gz + + upload_all: + needs: [build_wheels, build_sdist] + runs-on: ubuntu-latest + if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags') + steps: + - uses: actions/download-artifact@v4 + with: + pattern: artifact-* + merge-multiple: true + path: dist + + - uses: pypa/gh-action-pypi-publish@release/v1 + with: + password: ${{ secrets.pypi_password }} diff --git a/.gitignore b/.gitignore index 0c020fb18..e13d3cdcd 100644 --- a/.gitignore +++ b/.gitignore @@ -31,7 +31,7 @@ *.orig # cmake build dir -build/* +/*build* */cmake-build-debug/* build_dir_debug/ cmake-build-debug/ @@ -45,3 +45,11 @@ cmake-build-debug/ # vim swap files *.swp + +# python +*.egg-info + +# pybind build folder +python*/*build* +python*/dist +python*/.cache/ diff --git a/CMakeLists.txt b/CMakeLists.txt index 0608bbd33..8d7959711 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.9) +cmake_minimum_required(VERSION 3.21) project(keyvi) #### Build Type @@ -6,14 +6,25 @@ if (CMAKE_BUILD_TYPE) string(TOUPPER ${CMAKE_BUILD_TYPE} CMAKE_BUILD_TYPE_UPPER) endif() +#### Options + +option(KEYVI_C_BINDINGS "Keyvi: Build C binding" ${PROJECT_IS_TOP_LEVEL}) +option(KEYVI_PYTHON_BINDINGS "Keyvi: Build Python module" OFF) +option(KEYVI_TESTS "Keyvi: Build unit tests" ${PROJECT_IS_TOP_LEVEL}) +option(KEYVI_BINARIES "Keyvi: Build Python module" ${PROJECT_IS_TOP_LEVEL}) +option(KEYVI_CLANG_TIDY "Keyvi: Build with clang tidy" ${PROJECT_IS_TOP_LEVEL}) +option(KEYVI_DOCS "Keyvi: Build docs" ${PROJECT_IS_TOP_LEVEL}) + #### Linting -find_program(CLANGTIDY clang-tidy) -if(CLANGTIDY) - message ("-- Found clang-tidy") - set(CMAKE_CXX_CLANG_TIDY clang-tidy; --extra-arg-before=-std=c++17) -else() - message ("-- clang-tidy not found") -endif() +if(KEYVI_CLANG_TIDY) + find_program(CLANGTIDY clang-tidy) + if(CLANGTIDY) + message ("-- Found clang-tidy") + set(CMAKE_CXX_CLANG_TIDY clang-tidy; --extra-arg-before=-std=c++17) + else() + message ("-- clang-tidy not found") + endif() +endif(KEYVI_CLANG_TIDY) #### Cmake modules set(CMAKE_MODULE_PATH "${CMAKE_MODULE_PATH};${CMAKE_CURRENT_SOURCE_DIR}/cmake_modules/") @@ -141,67 +152,73 @@ string(REPLACE " " ";" _KEYVI_COMPILE_DEFINITIONS_LIST "${_KEYVI_COMPILE_DEFINIT #### Targets #### -# keyvicompiler -add_executable(keyvicompiler keyvi/bin/keyvicompiler/keyvicompiler.cpp) -target_link_libraries(keyvicompiler ${Boost_LIBRARIES} ${ZLIB_LIBRARIES} ${Snappy_LIBRARY} ${_OS_LIBRARIES}) -target_compile_options(keyvicompiler PRIVATE ${_KEYVI_CXX_FLAGS_LIST}) -target_compile_definitions(keyvicompiler PRIVATE ${_KEYVI_COMPILE_DEFINITIONS_LIST}) -target_include_directories(keyvicompiler PRIVATE "$") +if(KEYVI_BINARIES) + # keyvicompiler + add_executable(keyvicompiler keyvi/bin/keyvicompiler/keyvicompiler.cpp) + target_link_libraries(keyvicompiler ${Boost_LIBRARIES} ${ZLIB_LIBRARIES} ${Snappy_LIBRARY} ${_OS_LIBRARIES}) + target_compile_options(keyvicompiler PRIVATE ${_KEYVI_CXX_FLAGS_LIST}) + target_compile_definitions(keyvicompiler PRIVATE ${_KEYVI_COMPILE_DEFINITIONS_LIST}) + target_include_directories(keyvicompiler PRIVATE "$") -install (TARGETS keyvicompiler DESTINATION bin COMPONENT applications OPTIONAL) + install (TARGETS keyvicompiler DESTINATION bin COMPONENT applications OPTIONAL) -# keyviinspector -add_executable(keyviinspector keyvi/bin/keyviinspector/keyviinspector.cpp) -target_link_libraries(keyviinspector ${Boost_LIBRARIES} ${ZLIB_LIBRARIES} ${Snappy_LIBRARY} ${_OS_LIBRARIES}) -target_compile_options(keyviinspector PRIVATE ${_KEYVI_CXX_FLAGS_LIST}) -target_compile_definitions(keyviinspector PRIVATE ${_KEYVI_COMPILE_DEFINITIONS_LIST}) -target_include_directories(keyviinspector PRIVATE "$") + # keyviinspector + add_executable(keyviinspector keyvi/bin/keyviinspector/keyviinspector.cpp) + target_link_libraries(keyviinspector ${Boost_LIBRARIES} ${ZLIB_LIBRARIES} ${Snappy_LIBRARY} ${_OS_LIBRARIES}) + target_compile_options(keyviinspector PRIVATE ${_KEYVI_CXX_FLAGS_LIST}) + target_compile_definitions(keyviinspector PRIVATE ${_KEYVI_COMPILE_DEFINITIONS_LIST}) + target_include_directories(keyviinspector PRIVATE "$") -install (TARGETS keyviinspector DESTINATION bin COMPONENT applications OPTIONAL) + install (TARGETS keyviinspector DESTINATION bin COMPONENT applications OPTIONAL) -# keyvimerger -add_executable(keyvimerger keyvi/bin/keyvimerger/keyvimerger.cpp) -target_link_libraries(keyvimerger ${Boost_LIBRARIES} ${ZLIB_LIBRARIES} ${Snappy_LIBRARY} ${_OS_LIBRARIES}) -target_compile_options(keyvimerger PRIVATE ${_KEYVI_CXX_FLAGS_LIST}) -target_compile_definitions(keyvimerger PRIVATE ${_KEYVI_COMPILE_DEFINITIONS_LIST}) -target_include_directories(keyvimerger PRIVATE "$") + # keyvimerger + add_executable(keyvimerger keyvi/bin/keyvimerger/keyvimerger.cpp) + target_link_libraries(keyvimerger ${Boost_LIBRARIES} ${ZLIB_LIBRARIES} ${Snappy_LIBRARY} ${_OS_LIBRARIES}) + target_compile_options(keyvimerger PRIVATE ${_KEYVI_CXX_FLAGS_LIST}) + target_compile_definitions(keyvimerger PRIVATE ${_KEYVI_COMPILE_DEFINITIONS_LIST}) + target_include_directories(keyvimerger PRIVATE "$") -install (TARGETS keyvimerger DESTINATION bin COMPONENT applications) + install (TARGETS keyvimerger DESTINATION bin COMPONENT applications) +endif(KEYVI_BINARIES) # keyvi_c -add_library(keyvi_c SHARED keyvi/bin/keyvi_c/c_api.cpp) -target_link_libraries(keyvi_c ${Boost_LIBRARIES} ${ZLIB_LIBRARIES} ${Snappy_LIBRARY} ${_OS_LIBRARIES}) -target_compile_options(keyvi_c PRIVATE ${_KEYVI_CXX_FLAGS_LIST}) -target_compile_definitions(keyvi_c PRIVATE ${_KEYVI_COMPILE_DEFINITIONS_LIST}) -target_include_directories(keyvi_c PRIVATE "$") +if(KEYVI_C_BINDINGS) + add_library(keyvi_c SHARED keyvi/bin/keyvi_c/c_api.cpp) + target_link_libraries(keyvi_c ${Boost_LIBRARIES} ${ZLIB_LIBRARIES} ${Snappy_LIBRARY} ${_OS_LIBRARIES}) + target_compile_options(keyvi_c PRIVATE ${_KEYVI_CXX_FLAGS_LIST}) + target_compile_definitions(keyvi_c PRIVATE ${_KEYVI_COMPILE_DEFINITIONS_LIST}) + target_include_directories(keyvi_c PRIVATE "$") +endif(KEYVI_C_BINDINGS) # unit tests -FILE(GLOB_RECURSE UNIT_TEST_SOURCES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} keyvi/tests/keyvi/*.cpp) -add_executable(unit_test_all ${UNIT_TEST_SOURCES}) -target_link_libraries(unit_test_all ${Boost_LIBRARIES} ${ZLIB_LIBRARIES} ${Snappy_LIBRARY} ${_OS_LIBRARIES}) -target_compile_options(unit_test_all PRIVATE ${_KEYVI_CXX_FLAGS_LIST}) -target_compile_definitions(unit_test_all PRIVATE ${_KEYVI_COMPILE_DEFINITIONS_LIST}) -target_include_directories(unit_test_all PRIVATE "$") -add_dependencies(unit_test_all keyvimerger) - -if (WIN32) - message(STATUS "zlib: ${ZLIB_LIBRARY_RELEASE}") - # copies the dlls required to run to the build folder - foreach(LIB ${Boost_UNIT_TEST_FRAMEWORK_LIBRARY_RELEASE} ${Boost_FILESYSTEM_LIBRARY_RELEASE} ${ZLIB_LIBRARY_RELEASE}) - get_filename_component(UTF_BASE_NAME ${LIB} NAME_WE) - get_filename_component(UTF_PATH ${LIB} PATH) - if(EXISTS "${UTF_PATH}/${UTF_BASE_NAME}.dll") - add_custom_command(TARGET unit_test_all POST_BUILD - COMMAND ${CMAKE_COMMAND} -E copy "${UTF_PATH}/${UTF_BASE_NAME}.dll" ${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_BUILD_TYPE} - ) - # zlib might be stored in a different folder - elseif(EXISTS "${UTF_PATH}/../bin/${UTF_BASE_NAME}.dll") - add_custom_command(TARGET unit_test_all POST_BUILD - COMMAND ${CMAKE_COMMAND} -E copy "${UTF_PATH}/../bin/${UTF_BASE_NAME}.dll" ${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_BUILD_TYPE} - ) - endif() - endforeach() -endif (WIN32) +if(KEYVI_TESTS) + FILE(GLOB_RECURSE UNIT_TEST_SOURCES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} keyvi/tests/keyvi/*.cpp) + add_executable(unit_test_all ${UNIT_TEST_SOURCES}) + target_link_libraries(unit_test_all ${Boost_LIBRARIES} ${ZLIB_LIBRARIES} ${Snappy_LIBRARY} ${_OS_LIBRARIES}) + target_compile_options(unit_test_all PRIVATE ${_KEYVI_CXX_FLAGS_LIST}) + target_compile_definitions(unit_test_all PRIVATE ${_KEYVI_COMPILE_DEFINITIONS_LIST}) + target_include_directories(unit_test_all PRIVATE "$") + add_dependencies(unit_test_all keyvimerger) + + if (WIN32) + message(STATUS "zlib: ${ZLIB_LIBRARY_RELEASE}") + # copies the dlls required to run to the build folder + foreach(LIB ${Boost_UNIT_TEST_FRAMEWORK_LIBRARY_RELEASE} ${Boost_FILESYSTEM_LIBRARY_RELEASE} ${ZLIB_LIBRARY_RELEASE}) + get_filename_component(UTF_BASE_NAME ${LIB} NAME_WE) + get_filename_component(UTF_PATH ${LIB} PATH) + if(EXISTS "${UTF_PATH}/${UTF_BASE_NAME}.dll") + add_custom_command(TARGET unit_test_all POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy "${UTF_PATH}/${UTF_BASE_NAME}.dll" ${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_BUILD_TYPE} + ) + # zlib might be stored in a different folder + elseif(EXISTS "${UTF_PATH}/../bin/${UTF_BASE_NAME}.dll") + add_custom_command(TARGET unit_test_all POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy "${UTF_PATH}/../bin/${UTF_BASE_NAME}.dll" ${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_BUILD_TYPE} + ) + endif() + endforeach() + endif (WIN32) +endif(KEYVI_TESTS) # bindings add_custom_target(bindings @@ -226,10 +243,14 @@ target_include_directories(keyvi INTERFACE "$ target_compile_definitions(keyvi INTERFACE ${_KEYVI_COMPILE_DEFINITIONS_LIST}) target_link_libraries(keyvi INTERFACE ${Boost_LIBRARIES} ${ZLIB_LIBRARIES} ${Snappy_LIBRARY} ${_OS_LIBRARIES}) +if (KEYVI_PYTHON_BINDINGS) + add_subdirectory(python-pybind) +endif () + ### docs # don't run it as part of a non-toplevel build, e.g. python -if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/sphinx-docs) +if(KEYVI_DOCS) find_package(Doxygen) find_package(Sphinx COMPONENTS breathe) @@ -255,4 +276,4 @@ if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/sphinx-docs) else() message ("-- Skip doc target, doxygen/sphinx not found") endif() -endif() +endif(KEYVI_DOCS) diff --git a/keyvi/check-style.sh b/keyvi/check-style.sh index 13ae3a6cb..d6b3964c3 100755 --- a/keyvi/check-style.sh +++ b/keyvi/check-style.sh @@ -10,7 +10,7 @@ else commit_range="upstream/master...HEAD" fi -infiles=`git diff --name-only --diff-filter=ACMRT $(echo ${commit_range} | sed 's/\.//') | grep -v "3rdparty" | grep -E "\.(cpp|h)$"` +infiles=`git diff --name-only --diff-filter=ACMRT $(echo ${commit_range} | sed 's/\.//') | grep -v "3rdparty" | grep -v "pybind11" | grep -E "\.(cpp|h)$"` clang_format_files=() cpplint_files=() diff --git a/keyvi/include/keyvi/dictionary/match.h b/keyvi/include/keyvi/dictionary/match.h index db8b2a684..9586f7987 100644 --- a/keyvi/include/keyvi/dictionary/match.h +++ b/keyvi/include/keyvi/dictionary/match.h @@ -58,7 +58,7 @@ namespace dictionary { #ifdef Py_PYTHON_H class attributes_visitor : public boost::static_visitor { public: - PyObject* operator()(int i) const { return PyInt_FromLong(i); } + PyObject* operator()(int i) const { return PyLong_FromLong(i); } PyObject* operator()(double i) const { return PyFloat_FromDouble(i); } diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 000000000..74074d82d --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,80 @@ +[build-system] +requires = ["scikit-build-core>=0.10", "pybind11"] +build-backend = "scikit_build_core.build" + +[project] +name = "keyvi_scikit_core" +version = "0.6.3dev0" +dependencies = [ + "msgpack>=1.0.0", +] + +[project.optional-dependencies] +lint = [ + 'ruff>=0.5.0', +] +test = [ + 'pytest>=7.4.2', +] +dev = ['keyvi_scikit_core[lint,test]'] + +[tool.scikit-build] +wheel.expand-macos-universal-tags = true +minimum-version = "build-system.requires" + +[tool.scikit-build.cmake.define] +KEYVI_PYTHON_BINDINGS = "ON" +KEYVI_C_BINDINGS = "OFF" +KEYVI_TESTS = "OFF" +KEYVI_BINARIES = "OFF" +KEYVI_CLANG_TIDY = "OFF" +KEYVI_DOCS = "OFF" + +[tool.pytest.ini_options] +minversion = "7.1.1" +addopts = ["-ra", "--showlocals", "--strict-markers", "--strict-config"] +xfail_strict = true +log_cli_level = "INFO" +filterwarnings = [ + "error", + "ignore::pytest.PytestCacheWarning", +] +testpaths = ["python-pybind/tests"] + +[tool.cibuildwheel] +build-frontend = "default" +test-requires = "pytest" +test-command = "pytest {project}/python-pybind/tests" + +[tool.ruff.lint] +extend-select = [ + "B", # flake8-bugbear + "I", # isort + "ARG", # flake8-unused-arguments + "C4", # flake8-comprehensions + "EM", # flake8-errmsg + "ICN", # flake8-import-conventions + "G", # flake8-logging-format + "PGH", # pygrep-hooks + "PIE", # flake8-pie + "PL", # pylint + "PT", # flake8-pytest-style + "PTH", # flake8-use-pathlib + "RET", # flake8-return + "RUF", # Ruff-specific + "SIM", # flake8-simplify + "T20", # flake8-print + "UP", # pyupgrade + "YTT", # flake8-2020 + "EXE", # flake8-executable + "NPY", # NumPy specific rules + "PD", # pandas-vet +] +ignore = [ + "PLR09", # Too many X + "PLR2004", # Magic comparison +] +isort.required-imports = ["from __future__ import annotations"] + +[tool.ruff.lint.per-file-ignores] +"tests/**" = ["T20"] diff --git a/python-pybind/CMakeLists.txt b/python-pybind/CMakeLists.txt new file mode 100644 index 000000000..04a8bfd6c --- /dev/null +++ b/python-pybind/CMakeLists.txt @@ -0,0 +1,26 @@ +cmake_minimum_required(VERSION 3.15...3.27) + +# Scikit-build-core sets these values for you, or you can just hard-code the +# name and version. +project( + ${SKBUILD_PROJECT_NAME} + VERSION ${SKBUILD_PROJECT_VERSION} + LANGUAGES CXX) + +# Find the module development requirements (requires FindPython from 3.17 or +# scikit-build-core's built-in backport) +find_package(Python REQUIRED COMPONENTS Interpreter Development.Module) +find_package(pybind11 CONFIG REQUIRED) + +# Add a library using FindPython's tooling (pybind11 also provides a helper like +# this) +FILE(GLOB_RECURSE KEYVI_PYBIND_SOURCES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} src/*.cpp) +pybind11_add_module(keyvi_scikit_core ${KEYVI_PYBIND_SOURCES}) + +target_link_libraries(keyvi_scikit_core PRIVATE keyvi) + +# This is passing in the version as a define just as an example +target_compile_definitions(keyvi_scikit_core PRIVATE VERSION_INFO=${PROJECT_VERSION}) + +# The install directory is the output (wheel) directory +install(TARGETS keyvi_scikit_core DESTINATION .) diff --git a/python-pybind/src/.clang-format b/python-pybind/src/.clang-format new file mode 100644 index 000000000..ab84a2c7b --- /dev/null +++ b/python-pybind/src/.clang-format @@ -0,0 +1,12 @@ +--- +BasedOnStyle: Google +ColumnLimit: '120' +Language: Cpp +Standard: c++17 +TabWidth: '2' +UseTab: Never +ConstructorInitializerIndentWidth: 4 +AllowShortFunctionsOnASingleLine: Inline +IncludeBlocks: Preserve + +... diff --git a/python-pybind/src/CPPLINT.cfg b/python-pybind/src/CPPLINT.cfg new file mode 100644 index 000000000..6ed77f519 --- /dev/null +++ b/python-pybind/src/CPPLINT.cfg @@ -0,0 +1,3 @@ +linelength=120 +root=. +filter=-build/include_subdir,-whitespace/indent_namespace diff --git a/python-pybind/src/compiler/py_dictionary_compilers.cpp b/python-pybind/src/compiler/py_dictionary_compilers.cpp new file mode 100644 index 000000000..0f0774089 --- /dev/null +++ b/python-pybind/src/compiler/py_dictionary_compilers.cpp @@ -0,0 +1,121 @@ +/* keyvi - A key value store. + * + * Copyright 2024 Hendrik Muhs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include +#include + +#include "keyvi/dictionary/dictionary_types.h" + +namespace py = pybind11; +namespace kd = keyvi::dictionary; + +template +inline void py_compile(Compiler *c, std::function progress_callback) { + if (progress_callback == nullptr) { + c->Compile(); + return; + } + auto progress_compiler_callback = [](size_t a, size_t b, void *user_data) { + auto py_callback = *reinterpret_cast *>(user_data); + py_callback(a, b); + }; + void *user_data = reinterpret_cast(&progress_callback); + c->Compile(progress_compiler_callback, user_data); +} + +void init_keyvi_dictionary_compilers(const py::module_ &module) { +#define CREATE_COMPILER(compiler, name) \ + py::class_(module, name) \ + .def(py::init<>()) \ + .def(py::init()) \ + .def("__enter__", [](compiler &c) { return &c; }) \ + .def("__exit__", [](compiler &c, void *exc_type, void *exc_value, void *traceback) { c.Compile(); }) \ + .def("__setitem__", &compiler::Add) \ + .def("add", &compiler::Add) /* DEPRECATED */ \ + .def("Add", &compiler::Add) \ + .def( \ + "compile", \ + [](compiler &c, std::function progress_callback) { \ + py_compile(&c, progress_callback); \ + }, \ + py::arg("progress_callback") = \ + static_cast *>(nullptr)) /* DEPRECATED */ \ + .def( \ + "Compile", \ + [](compiler &c, std::function progress_callback) { \ + py_compile(&c, progress_callback); \ + }, \ + py::arg("progress_callback") = static_cast *>(nullptr)) \ + .def("set_manifest", &compiler::SetManifest) \ + .def("write_to_file", &compiler::WriteToFile, py::call_guard()) /* DEPRECATED */ \ + .def("WriteToFile", &compiler::WriteToFile, py::call_guard()); +#define CREATE_SK_COMPILER(compiler, name) \ + py::class_(module, name) \ + .def(py::init &>()) \ + .def(py::init &, const keyvi::util::parameters_t &>()) \ + .def("__enter__", [](compiler &c) { return &c; }) \ + .def("__exit__", [](compiler &c, void *exc_type, void *exc_value, void *traceback) { c.Compile(); }) \ + .def("__setitem__", &compiler::Add) \ + .def("add", &compiler::Add) \ + .def( \ + "compile", \ + [](compiler &c, std::function progress_callback) { \ + py_compile(&c, progress_callback); \ + }, \ + py::arg("progress_callback") = static_cast *>(nullptr)) \ + .def("set_manifest", &compiler::SetManifest) \ + .def("write_to_file", &compiler::WriteToFile, py::call_guard()); +#define CREATE_MERGER(merger, name) \ + py::class_(module, name) \ + .def(py::init<>()) \ + .def(py::init()) \ + .def("__enter__", [](merger &m) { return &m; }) \ + .def("__exit__", [](merger &m, void *exc_type, void *exc_value, void *traceback) { m.Merge(); }) \ + .def("add", &merger::Add) \ + .def("merge", \ + [](merger &m) { \ + pybind11::gil_scoped_release release_gil; \ + m.Merge(); \ + }) \ + .def("merge", \ + [](merger &m, const std::string &filename) { \ + pybind11::gil_scoped_release release_gil; \ + m.Merge(filename); \ + }) \ + .def("set_manifest", &merger::SetManifest) \ + .def("write_to_file", &merger::WriteToFile, py::call_guard()); + CREATE_COMPILER(kd::CompletionDictionaryCompiler, "CompletionDictionaryCompiler"); + CREATE_COMPILER(kd::FloatVectorDictionaryCompiler, "FloatVectorDictionaryCompiler"); + CREATE_COMPILER(kd::IntDictionaryCompiler, "IntDictionaryCompiler"); + CREATE_COMPILER(kd::JsonDictionaryCompiler, "JsonDictionaryCompiler"); + CREATE_COMPILER(kd::KeyOnlyDictionaryCompiler, "KeyOnlyDictionaryCompiler"); + CREATE_COMPILER(kd::StringDictionaryCompiler, "StringDictionaryCompiler"); + CREATE_SK_COMPILER(kd::SecondaryKeyCompletionDictionaryCompiler, "SecondaryKeyCompletionDictionaryCompiler"); + CREATE_SK_COMPILER(kd::SecondaryKeyFloatVectorDictionaryCompiler, "SecondaryKeyFloatVectorDictionaryCompiler"); + CREATE_SK_COMPILER(kd::SecondaryKeyIntDictionaryCompiler, "SecondaryKeyIntDictionaryCompiler"); + CREATE_SK_COMPILER(kd::SecondaryKeyJsonDictionaryCompiler, "SecondaryKeyJsonDictionaryCompiler"); + CREATE_SK_COMPILER(kd::SecondaryKeyKeyOnlyDictionaryCompiler, "SecondaryKeyKeyOnlyDictionaryCompiler"); + CREATE_SK_COMPILER(kd::SecondaryKeyStringDictionaryCompiler, "SecondaryKeyStringDictionaryCompiler"); + CREATE_MERGER(kd::CompletionDictionaryMerger, "CompletionDictionaryMerger"); + CREATE_MERGER(kd::IntDictionaryMerger, "IntDictionaryMerger"); + +#undef CREATE_COMPILER +} diff --git a/python-pybind/src/dictionary/py_dictionary.cpp b/python-pybind/src/dictionary/py_dictionary.cpp new file mode 100644 index 000000000..2333bb4a5 --- /dev/null +++ b/python-pybind/src/dictionary/py_dictionary.cpp @@ -0,0 +1,132 @@ +/* keyvi - A key value store. + * + * Copyright 2024 Hendrik Muhs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include + +#include "keyvi/dictionary/dictionary.h" +#include "keyvi/dictionary/match.h" + +#include "py_match_iterator.h" + +namespace py = pybind11; +namespace kd = keyvi::dictionary; +namespace kpy = keyvi::pybind; + +void init_keyvi_dictionary(const py::module_ &m) { + m.doc() = R"pbdoc( + keyvi.dictionary + ----------------------- + + .. currentmodule:: keyvi.dictionary + + .. autosummary:: + :toctree: _generate + + )pbdoc"; + + // TODO(hendrik): 'items', 'keys', 'manifest', 'match_fuzzy', 'match_near', + // 'search_tokenized', 'statistics', 'values' + py::class_(m, "Dictionary") + .def(py::init()) + .def(py::init()) + .def( + "complete_fuzzy_multiword", + [](const kd::Dictionary &d, const std::string &query, const int32_t max_edit_distance, + const size_t minimum_exact_prefix = 0, const unsigned char multiword_separator = 0x1b) { + auto m = d.GetFuzzyMultiwordCompletion(query, max_edit_distance, minimum_exact_prefix, multiword_separator); + return kpy::make_match_iterator(m.begin(), m.end()); + }, + py::arg("query"), py::arg("max_edit_distance"), py::arg("minimum_exact_prefix") = 0, + py::arg("multiword_separator") = 0x1b, + R"pbdoc(Complete the given key to full matches after whitespace tokenizing, + allowing up to max_edit_distance distance(Levenshtein). + In case the used dictionary supports inner weights, the + completer traverses the dictionary according to weights, + otherwise byte-order. + )pbdoc") + .def( + "complete_multiword", + [](const kd::Dictionary &d, const std::string &query, const unsigned char multiword_separator = 0x1b) { + auto m = d.GetMultiwordCompletion(query, multiword_separator); + return kpy::make_match_iterator(m.begin(), m.end()); + }, + py::arg("query"), py::arg("multiword_separator") = 0x1b, + R"pbdoc(Complete the given key to full matches after whitespace tokenizing + and return the top n completions. + In case the used dictionary supports inner weights, the + completer traverses the dictionary according to weights, + otherwise byte-order. + + Note, due to depth-first traversal the traverser + immediately yields results when it visits them. The results are + neither in order nor limited to n. It is up to the caller to resort + and truncate the lists of results. + Only the number of top completions is guaranteed. + )pbdoc") + .def( + "complete_prefix", + [](const kd::Dictionary &d, const std::string &query) { + auto m = d.GetPrefixCompletion(query); + return kpy::make_match_iterator(m.begin(), m.end()); + }, + py::arg("query"), + R"pbdoc(Complete the given key to full matches after whitespace tokenizing + and return the top n completions. + In case the used dictionary supports inner weights, the + completer traverses the dictionary according to weights, + otherwise byte-order. + + Note, due to depth-first traversal the traverser + immediately yields results when it visits them. The results are + neither in order nor limited to n. It is up to the caller to resort + and truncate the lists of results. + Only the number of top completions is guaranteed. + )pbdoc") + .def( + "complete_prefix", + [](const kd::Dictionary &d, const std::string &query, size_t top_n) { + auto m = d.GetPrefixCompletion(query, top_n); + return kpy::make_match_iterator(m.begin(), m.end()); + }, + py::arg("query"), py::arg("top_n"), + R"pbdoc(Complete the given key to full matches after whitespace tokenizing + and return the top n completions. + In case the used dictionary supports inner weights, the + completer traverses the dictionary according to weights, + otherwise byte-order. + + Note, due to depth-first traversal the traverser + immediately yields results when it visits them. The results are + neither in order nor limited to n. It is up to the caller to resort + and truncate the lists of results. + Only the number of top completions is guaranteed. + )pbdoc") + .def("get", &kd::Dictionary::operator[], R"pbdoc( + Get an entry from the dictionary. + )pbdoc") + .def("__getitem__", &kd::Dictionary::operator[], R"pbdoc( + Get an entry from the dictionary. + )pbdoc") + .def("match", + [](const kd::Dictionary &d, const std::string &key) { + auto m = d.Get(key); + return kpy::make_match_iterator(m.begin(), m.end()); + }) + .def("search", &kd::Dictionary::Lookup); +} diff --git a/python-pybind/src/dictionary/py_match.cpp b/python-pybind/src/dictionary/py_match.cpp new file mode 100644 index 000000000..f9b610676 --- /dev/null +++ b/python-pybind/src/dictionary/py_match.cpp @@ -0,0 +1,61 @@ +/* * keyvi - A key value store. + * + * Copyright 2024 Hendrik Muhs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include + +#include "keyvi/dictionary/dictionary.h" +#include "keyvi/dictionary/match.h" + +#include "py_match_iterator.h" + +namespace py = pybind11; +namespace kd = keyvi::dictionary; + +inline const py::object &get_msgpack_loads_func() { + PYBIND11_CONSTINIT static py::gil_safe_call_once_and_store storage; + return storage + .call_once_and_store_result([]() -> py::object { return py::getattr(py::module_::import("msgpack"), "loads"); }) + .get_stored(); +} + +void init_keyvi_match(const py::module_ &m) { + py::module_ msgpack_ = py::module_::import("msgpack"); + + py::class_>(m, "Match") + .def(py::init<>()) + .def_property("start", &kd::Match::GetStart, &kd::Match::SetStart) + .def_property("end", &kd::Match::GetEnd, &kd::Match::SetEnd) + .def_property("score", &kd::Match::GetScore, &kd::Match::SetScore) + .def_property("matched_string", &kd::Match::GetMatchedString, &kd::Match::SetMatchedString) + .def_property_readonly("value", + [&msgpack_](const kd::Match &m) -> py::object { + auto packed_value = m.GetMsgPackedValueAsString(); + if (packed_value.empty()) { + return py::none(); + } + return get_msgpack_loads_func()(py::bytes(packed_value)); + }) + .def("value_as_string", &kd::Match::GetValueAsString) + .def("raw_value_as_string", &kd::Match::GetRawValueAsString) + .def("__get_item__", &kd::Match::GetAttributePy) + // __setitem__ + // dumps loads + .def_property_readonly("weight", &kd::Match::GetWeight) + .def("__bool__", [](const kd::Match &m) -> bool { return !m.IsEmpty(); }); +} diff --git a/python-pybind/src/dictionary/py_match_iterator.h b/python-pybind/src/dictionary/py_match_iterator.h new file mode 100644 index 000000000..21500d931 --- /dev/null +++ b/python-pybind/src/dictionary/py_match_iterator.h @@ -0,0 +1,75 @@ +/* * keyvi - A key value store. + * + * Copyright 2024 Hendrik Muhs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef DICTIONARY_PY_MATCH_ITERATOR_H_ +#define DICTIONARY_PY_MATCH_ITERATOR_H_ + +#include + +#include + +namespace keyvi { +namespace pybind { + +// adapted from pybind11.h +template +pybind11::iterator make_match_iterator_impl(Iterator first, Sentinel last, Extra &&...extra) { + using state = pybind11::detail::iterator_state; + if (!pybind11::detail::get_type_info(typeid(state), false)) { + pybind11::class_(pybind11::handle(), "iterator", pybind11::module_local()) + .def("__iter__", [](state &s) -> state & { return s; }) + .def( + "__next__", + [](state &s) -> ValueType { + { + // release GIL as incrementing the iterator can be expensive, e.g. for fuzzy match + pybind11::gil_scoped_release no_gil; + if (!s.first_or_done) { + ++s.it; + } else { + s.first_or_done = false; + } + if (s.it == s.end) { + s.first_or_done = true; + throw pybind11::stop_iteration(); + } + } + + return Access()(s.it); + }, + std::forward(extra)..., Policy) + .def("set_min_weight", [](state &s, const uint32_t min_weight) -> void { s.it.SetMinWeight(min_weight); }); + } + + return pybind11::cast(state{std::forward(first), std::forward(last), true}); +} + +/// Makes a python iterator from a first and past-the-end C++ InputIterator. +template ::result_type, + typename... Extra> +pybind11::typing::Iterator make_match_iterator(Iterator first, Sentinel last, Extra &&...extra) { + return make_match_iterator_impl, Policy, Iterator, Sentinel, ValueType, + Extra...>(std::forward(first), std::forward(last), + std::forward(extra)...); +} + +} /* namespace pybind */ +} /* namespace keyvi */ + +#endif // DICTIONARY_PY_MATCH_ITERATOR_H_ diff --git a/python-pybind/src/py_keyvi.cpp b/python-pybind/src/py_keyvi.cpp new file mode 100644 index 000000000..d10138cbb --- /dev/null +++ b/python-pybind/src/py_keyvi.cpp @@ -0,0 +1,66 @@ +/* * keyvi - A key value store. + * + * Copyright 2015 Hendrik Muhs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include "keyvi/dictionary/fsa/internal/memory_map_flags.h" + +#define STRINGIFY(x) #x +#define MACRO_STRINGIFY(x) STRINGIFY(x) + +namespace py = pybind11; +namespace kd = keyvi::dictionary; + +void init_keyvi_dictionary(const py::module_ &); +void init_keyvi_dictionary_compilers(const py::module_ &); +void init_keyvi_match(const py::module_ &); + +PYBIND11_MODULE(keyvi_scikit_core, m) { + m.doc() = R"pbdoc( + keyvi - a key value store. + ----------------------- + + .. currentmodule:: keyvi + + .. autosummary:: + :toctree: _generate + + )pbdoc"; + + py::enum_(m, "loading_strategy_types") + .value("default_os", kd::loading_strategy_types::default_os) + .value("lazy", kd::loading_strategy_types::lazy) + .value("populate", kd::loading_strategy_types::populate) + .value("populate_key_part", kd::loading_strategy_types::populate_key_part) + .value("populate_lazy", kd::loading_strategy_types::populate_lazy) + .value("lazy_no_readahead", kd::loading_strategy_types::lazy_no_readahead) + .value("lazy_no_readahead_value_part", kd::loading_strategy_types::lazy_no_readahead_value_part) + .value("populate_key_part_no_readahead_value_part", + kd::loading_strategy_types::populate_key_part_no_readahead_value_part); + + init_keyvi_match(m); + py::module keyvi_dictionary = m.def_submodule("dictionary", "keyvi_scikit_core.dictionary"); + init_keyvi_dictionary(keyvi_dictionary); + py::module keyvi_compilers = m.def_submodule("compiler", "keyvi_scikit_core.compiler"); + init_keyvi_dictionary_compilers(keyvi_compilers); + +#ifdef VERSION_INFO + m.attr("__version__") = MACRO_STRINGIFY(VERSION_INFO); +#else + m.attr("__version__") = "dev"; +#endif +} diff --git a/python-pybind/tests/match_object_test.py b/python-pybind/tests/match_object_test.py new file mode 100644 index 000000000..b1976a3be --- /dev/null +++ b/python-pybind/tests/match_object_test.py @@ -0,0 +1,161 @@ +# -*- coding: utf-8 -*- +# Usage: py.test tests + +import keyvi_scikit_core as keyvi +from test_tools import tmp_dictionary +import warnings + + +from keyvi_scikit_core.compiler import ( + JsonDictionaryCompiler, + CompletionDictionaryCompiler, + KeyOnlyDictionaryCompiler, + StringDictionaryCompiler, +) + + +""" def test_serialization(): + m = keyvi.Match() + m.start = 22 + m.end = 30 + m.score = 42 + d = m.dumps() + m2 = keyvi.Match.loads(d) + assert m2.start == 22 + assert m2.end == 30 + assert m2.score == 42 """ + + +""" def test_raw_serialization(): + c = JsonDictionaryCompiler({"memory_limit_mb": "10"}) + c.Add("abc", '{"a" : 2}') + c.Add("abd", '{"a" : 3}') + with tmp_dictionary(c, 'match_object_json.kv') as d: + m = d["abc"] + assert m.value_as_string() == '{"a":2}' + d = m.dumps() + m2 = keyvi.Match.loads(d) + assert m2.value_as_string() == '{"a":2}' + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + assert m.GetValueAsString() == '{"a":2}' + assert len(w) == 1 + assert issubclass(w[-1].category, DeprecationWarning) + """ + +""" def test_unicode_attributes(): + m = keyvi.Match() + m["küy"] = 22 + assert m["küy"] == 22 + m["k2"] = " 吃饭了吗" + m.score = 99 + assert m["k2"] == " 吃饭了吗" + assert m.score == 99.0 + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + m.SetAttribute("k2", "öäü") + assert m["k2"] == "öäü" + assert m.GetAttribute("k2") == "öäü" + assert len(w) == 2 + assert issubclass(w[0].category, DeprecationWarning) + assert issubclass(w[1].category, DeprecationWarning) """ + + +""" def test_bytes_attributes(): + m = keyvi.Match() + bytes_key = bytes(u"äöü".encode('utf-8')) + bytes_value = bytes(u"äöüöäü".encode('utf-8')) + m[bytes_key] = 22 + assert m[bytes_key] == 22 + m["k2"] = bytes_value + assert m["k2"] == "äöüöäü" + + +def test_double_attributes(): + m = keyvi.Match() + bytes_key = bytes("abc".encode('utf-8')) + m[bytes_key] = 42.0 + assert m[bytes_key] == 42.0 + + +def test_boolean_attributes(): + m = keyvi.Match() + bytes_key = bytes("def".encode('utf-8')) + m[bytes_key] = True + assert m[bytes_key] == True """ + + +def test_start(): + m = keyvi.Match() + m.start = 42 + assert m.start == 42 + + +def test_end(): + m = keyvi.Match() + m.end = 49 + assert m.end == 49 + + +def test_score(): + m = keyvi.Match() + m.score = 149 + assert m.score == 149 + + +""" def test_get_value(): + c = JsonDictionaryCompiler({"memory_limit_mb": "10"}) + c.Add("abc", '{"a" : 2}') + c.Add("abd", '{"a" : 3}') + with tmp_dictionary(c, 'match_object_json.kv') as d: + m = d["abc"] + assert m.value == {"a": 2} + m = d["abd"] + assert m.value == {"a": 3} """ + + +def test_get_value_int(): + c = CompletionDictionaryCompiler({"memory_limit_mb": "10"}) + c.Add("abc", 42) + c.Add("abd", 21) + with tmp_dictionary(c, 'match_object_int.kv') as d: + m = d["abc"] + assert m.value == 42 + m = d["abd"] + assert m.value == 21 + + +""" def test_get_value_key_only(): + c = KeyOnlyDictionaryCompiler({"memory_limit_mb": "10"}) + c.Add("abc") + c.Add("abd") + with tmp_dictionary(c, 'match_object_key_only.kv') as d: + m = d["abc"] + assert m.value == '' + m = d["abd"] + assert m.value == '' + """ + +def test_get_value_string(): + c = StringDictionaryCompiler({"memory_limit_mb": "10"}) + c.Add("abc", "aaaaa") + c.Add("abd", "bbbbb") + with tmp_dictionary(c, 'match_object_string.kv') as d: + m = d["abc"] + assert m.value == "aaaaa" + m = d["abd"] + assert m.value == "bbbbb" + + +def test_matched_string(): + m = keyvi.Match() + m.matched_string = "match" + assert m.matched_string == "match" + + +def test_bool_operator(): + m = keyvi.Match() + assert not m + m.end = 42 + assert not m is False + assert m diff --git a/python-pybind/tests/test_tools.py b/python-pybind/tests/test_tools.py new file mode 100644 index 000000000..98bf058d4 --- /dev/null +++ b/python-pybind/tests/test_tools.py @@ -0,0 +1,21 @@ +# -*- coding: utf-8 -*- +# some common tools for tests + +import contextlib +import os +import tempfile + +from keyvi_scikit_core.dictionary import Dictionary + + +@contextlib.contextmanager +def tmp_dictionary(compiler, file_name): + tmp_dir = tempfile.gettempdir() + fq_file_name = os.path.join(tmp_dir, file_name) + compiler.Compile() + compiler.WriteToFile(fq_file_name) + del compiler + d = Dictionary(fq_file_name) + yield d + del d + os.remove(fq_file_name) diff --git a/python/integration-tests/compiler/var_length_short_calculation_test.py b/python/integration-tests/compiler/var_length_short_calculation_test.py index 590c63017..c04f21abc 100644 --- a/python/integration-tests/compiler/var_length_short_calculation_test.py +++ b/python/integration-tests/compiler/var_length_short_calculation_test.py @@ -26,7 +26,7 @@ def test_input_output_keys(): output_keys_count = 0 with tmp_dictionary(compiler, 'var_length_short_test.kv') as d: - for _ in d.GetAllItems(): + for _ in d.items(): output_keys_count += 1 assert input_keys_count == output_keys_count diff --git a/python/tests/dictionary/loading_test.py b/python/tests/dictionary/loading_test.py index ce83e8698..092ed18ff 100644 --- a/python/tests/dictionary/loading_test.py +++ b/python/tests/dictionary/loading_test.py @@ -39,14 +39,14 @@ def test_truncated_file_json(): c.WriteToFile(os.path.join(tmp_dir,'truncation_test.kv')) size = os.path.getsize(os.path.join(tmp_dir, 'truncation_test.kv')) - fd_in = open(os.path.join(tmp_dir,'truncation_test.kv'), 'rb') - fd = open(os.path.join(tmp_dir,'truncation_test1.kv'), 'wb') - fd.write(fd_in.read(int(size/2))) - fd.close() - - fd2 = open(os.path.join(tmp_dir,'truncation_test2.kv'), 'wb') - fd2.write(fd_in.read(int(size-2))) - fd2.close() + with open(os.path.join(tmp_dir,'truncation_test.kv'), 'rb') as fd_in: + fd = open(os.path.join(tmp_dir,'truncation_test1.kv'), 'wb') + fd.write(fd_in.read(int(size/2))) + fd.close() + + fd2 = open(os.path.join(tmp_dir,'truncation_test2.kv'), 'wb') + fd2.write(fd_in.read(int(size-2))) + fd2.close() with pytest.raises(ValueError): d=Dictionary(os.path.join(tmp_dir, 'truncation_test1.kv')) diff --git a/python/tests/index/merger_binary_test.py b/python/tests/index/merger_binary_test.py index ad082268b..b894370e7 100644 --- a/python/tests/index/merger_binary_test.py +++ b/python/tests/index/merger_binary_test.py @@ -10,5 +10,5 @@ def test_merger_binary(): cmd = get_interpreter_executable() + b" " + os.path.join(get_package_root(), b"_pycore" , b"keyvimerger.py") + b" -h" - rc = subprocess.call(cmd, shell=True, stdout=open(os.devnull, 'w')) + rc = subprocess.call(cmd, shell=True) assert rc == 0