From 103101055a77ef5ed9f5c40797dfc6e51ad4cf75 Mon Sep 17 00:00:00 2001 From: Miguel Grinberg Date: Fri, 10 Jan 2025 12:57:01 +0000 Subject: [PATCH 01/11] Add Elasticsearch-DSL code --- elasticsearch/dsl/__init__.py | 206 + elasticsearch/dsl/_async/__init__.py | 16 + elasticsearch/dsl/_async/document.py | 521 ++ elasticsearch/dsl/_async/faceted_search.py | 51 + elasticsearch/dsl/_async/index.py | 638 ++ elasticsearch/dsl/_async/mapping.py | 49 + elasticsearch/dsl/_async/search.py | 232 + elasticsearch/dsl/_async/update_by_query.py | 47 + elasticsearch/dsl/_sync/__init__.py | 16 + elasticsearch/dsl/_sync/document.py | 513 ++ elasticsearch/dsl/_sync/faceted_search.py | 51 + elasticsearch/dsl/_sync/index.py | 596 ++ elasticsearch/dsl/_sync/mapping.py | 49 + elasticsearch/dsl/_sync/search.py | 217 + elasticsearch/dsl/_sync/update_by_query.py | 45 + elasticsearch/dsl/aggs.py | 3731 ++++++++++ elasticsearch/dsl/analysis.py | 341 + elasticsearch/dsl/async_connections.py | 37 + elasticsearch/dsl/connections.py | 144 + elasticsearch/dsl/document.py | 20 + elasticsearch/dsl/document_base.py | 444 ++ elasticsearch/dsl/exceptions.py | 32 + elasticsearch/dsl/faceted_search.py | 28 + elasticsearch/dsl/faceted_search_base.py | 489 ++ elasticsearch/dsl/field.py | 587 ++ elasticsearch/dsl/function.py | 180 + elasticsearch/dsl/index.py | 23 + elasticsearch/dsl/index_base.py | 178 + elasticsearch/dsl/mapping.py | 19 + elasticsearch/dsl/mapping_base.py | 219 + elasticsearch/dsl/py.typed | 0 elasticsearch/dsl/query.py | 2795 ++++++++ elasticsearch/dsl/response/__init__.py | 354 + elasticsearch/dsl/response/aggs.py | 100 + elasticsearch/dsl/response/hit.py | 53 + elasticsearch/dsl/search.py | 28 + elasticsearch/dsl/search_base.py | 1040 +++ elasticsearch/dsl/serializer.py | 34 + elasticsearch/dsl/types.py | 6272 +++++++++++++++++ elasticsearch/dsl/update_by_query.py | 19 + elasticsearch/dsl/update_by_query_base.py | 149 + elasticsearch/dsl/utils.py | 686 ++ elasticsearch/dsl/wrappers.py | 119 + examples/dsl/README.rst | 47 + examples/dsl/alias_migration.py | 161 + examples/dsl/async/alias_migration.py | 162 + examples/dsl/async/completion.py | 114 + examples/dsl/async/composite_agg.py | 94 + examples/dsl/async/parent_child.py | 276 + examples/dsl/async/percolate.py | 117 + examples/dsl/async/search_as_you_type.py | 99 + examples/dsl/async/semantic_text.py | 148 + examples/dsl/async/sparse_vectors.py | 198 + examples/dsl/async/vectors.py | 187 + examples/dsl/completion.py | 113 + examples/dsl/composite_agg.py | 91 + examples/dsl/parent_child.py | 275 + examples/dsl/percolate.py | 116 + examples/dsl/search_as_you_type.py | 93 + examples/dsl/semantic_text.py | 147 + examples/dsl/sparse_vectors.py | 197 + examples/dsl/vectors.py | 186 + pyproject.toml | 21 +- test_elasticsearch/test_dsl/__init__.py | 16 + .../test_dsl/_async/__init__.py | 16 + .../test_dsl/_async/test_document.py | 883 +++ .../test_dsl/_async/test_faceted_search.py | 201 + .../test_dsl/_async/test_index.py | 197 + .../test_dsl/_async/test_mapping.py | 222 + .../test_dsl/_async/test_search.py | 841 +++ .../test_dsl/_async/test_update_by_query.py | 180 + test_elasticsearch/test_dsl/_sync/__init__.py | 16 + .../test_dsl/_sync/test_document.py | 883 +++ .../test_dsl/_sync/test_faceted_search.py | 201 + .../test_dsl/_sync/test_index.py | 190 + .../test_dsl/_sync/test_mapping.py | 222 + .../test_dsl/_sync/test_search.py | 831 +++ .../test_dsl/_sync/test_update_by_query.py | 180 + test_elasticsearch/test_dsl/async_sleep.py | 24 + test_elasticsearch/test_dsl/conftest.py | 486 ++ test_elasticsearch/test_dsl/sleep.py | 24 + test_elasticsearch/test_dsl/test_aggs.py | 530 ++ test_elasticsearch/test_dsl/test_analysis.py | 216 + .../test_dsl/test_connections.py | 143 + test_elasticsearch/test_dsl/test_field.py | 234 + .../test_dsl/test_integration/__init__.py | 16 + .../test_integration/_async/__init__.py | 16 + .../test_integration/_async/test_analysis.py | 54 + .../test_integration/_async/test_document.py | 852 +++ .../_async/test_faceted_search.py | 305 + .../test_integration/_async/test_index.py | 162 + .../test_integration/_async/test_mapping.py | 171 + .../test_integration/_async/test_search.py | 304 + .../_async/test_update_by_query.py | 85 + .../test_integration/_sync/__init__.py | 16 + .../test_integration/_sync/test_analysis.py | 54 + .../test_integration/_sync/test_document.py | 844 +++ .../_sync/test_faceted_search.py | 305 + .../test_integration/_sync/test_index.py | 160 + .../test_integration/_sync/test_mapping.py | 169 + .../test_integration/_sync/test_search.py | 294 + .../_sync/test_update_by_query.py | 85 + .../test_dsl/test_integration/test_count.py | 46 + .../test_dsl/test_integration/test_data.py | 1093 +++ .../test_examples/__init__.py | 16 + .../test_examples/_async/__init__.py | 16 + .../_async/test_alias_migration.py | 73 + .../test_examples/_async/test_completion.py | 39 + .../_async/test_composite_aggs.py | 57 + .../test_examples/_async/test_parent_child.py | 116 + .../test_examples/_async/test_percolate.py | 37 + .../test_examples/_async/test_vectors.py | 56 + .../test_examples/_sync/__init__.py | 16 + .../_sync/test_alias_migration.py | 73 + .../test_examples/_sync/test_completion.py | 39 + .../_sync/test_composite_aggs.py | 57 + .../test_examples/_sync/test_parent_child.py | 111 + .../test_examples/_sync/test_percolate.py | 37 + .../test_examples/_sync/test_vectors.py | 56 + .../test_examples/async_examples | 1 + .../test_integration/test_examples/examples | 1 + test_elasticsearch/test_dsl/test_package.py | 22 + test_elasticsearch/test_dsl/test_query.py | 671 ++ test_elasticsearch/test_dsl/test_result.py | 215 + test_elasticsearch/test_dsl/test_utils.py | 136 + .../test_dsl/test_validation.py | 162 + test_elasticsearch/test_dsl/test_wrappers.py | 111 + 127 files changed, 38094 insertions(+), 1 deletion(-) create mode 100644 elasticsearch/dsl/__init__.py create mode 100644 elasticsearch/dsl/_async/__init__.py create mode 100644 elasticsearch/dsl/_async/document.py create mode 100644 elasticsearch/dsl/_async/faceted_search.py create mode 100644 elasticsearch/dsl/_async/index.py create mode 100644 elasticsearch/dsl/_async/mapping.py create mode 100644 elasticsearch/dsl/_async/search.py create mode 100644 elasticsearch/dsl/_async/update_by_query.py create mode 100644 elasticsearch/dsl/_sync/__init__.py create mode 100644 elasticsearch/dsl/_sync/document.py create mode 100644 elasticsearch/dsl/_sync/faceted_search.py create mode 100644 elasticsearch/dsl/_sync/index.py create mode 100644 elasticsearch/dsl/_sync/mapping.py create mode 100644 elasticsearch/dsl/_sync/search.py create mode 100644 elasticsearch/dsl/_sync/update_by_query.py create mode 100644 elasticsearch/dsl/aggs.py create mode 100644 elasticsearch/dsl/analysis.py create mode 100644 elasticsearch/dsl/async_connections.py create mode 100644 elasticsearch/dsl/connections.py create mode 100644 elasticsearch/dsl/document.py create mode 100644 elasticsearch/dsl/document_base.py create mode 100644 elasticsearch/dsl/exceptions.py create mode 100644 elasticsearch/dsl/faceted_search.py create mode 100644 elasticsearch/dsl/faceted_search_base.py create mode 100644 elasticsearch/dsl/field.py create mode 100644 elasticsearch/dsl/function.py create mode 100644 elasticsearch/dsl/index.py create mode 100644 elasticsearch/dsl/index_base.py create mode 100644 elasticsearch/dsl/mapping.py create mode 100644 elasticsearch/dsl/mapping_base.py create mode 100644 elasticsearch/dsl/py.typed create mode 100644 elasticsearch/dsl/query.py create mode 100644 elasticsearch/dsl/response/__init__.py create mode 100644 elasticsearch/dsl/response/aggs.py create mode 100644 elasticsearch/dsl/response/hit.py create mode 100644 elasticsearch/dsl/search.py create mode 100644 elasticsearch/dsl/search_base.py create mode 100644 elasticsearch/dsl/serializer.py create mode 100644 elasticsearch/dsl/types.py create mode 100644 elasticsearch/dsl/update_by_query.py create mode 100644 elasticsearch/dsl/update_by_query_base.py create mode 100644 elasticsearch/dsl/utils.py create mode 100644 elasticsearch/dsl/wrappers.py create mode 100644 examples/dsl/README.rst create mode 100644 examples/dsl/alias_migration.py create mode 100644 examples/dsl/async/alias_migration.py create mode 100644 examples/dsl/async/completion.py create mode 100644 examples/dsl/async/composite_agg.py create mode 100644 examples/dsl/async/parent_child.py create mode 100644 examples/dsl/async/percolate.py create mode 100644 examples/dsl/async/search_as_you_type.py create mode 100644 examples/dsl/async/semantic_text.py create mode 100644 examples/dsl/async/sparse_vectors.py create mode 100644 examples/dsl/async/vectors.py create mode 100644 examples/dsl/completion.py create mode 100644 examples/dsl/composite_agg.py create mode 100644 examples/dsl/parent_child.py create mode 100644 examples/dsl/percolate.py create mode 100644 examples/dsl/search_as_you_type.py create mode 100644 examples/dsl/semantic_text.py create mode 100644 examples/dsl/sparse_vectors.py create mode 100644 examples/dsl/vectors.py create mode 100644 test_elasticsearch/test_dsl/__init__.py create mode 100644 test_elasticsearch/test_dsl/_async/__init__.py create mode 100644 test_elasticsearch/test_dsl/_async/test_document.py create mode 100644 test_elasticsearch/test_dsl/_async/test_faceted_search.py create mode 100644 test_elasticsearch/test_dsl/_async/test_index.py create mode 100644 test_elasticsearch/test_dsl/_async/test_mapping.py create mode 100644 test_elasticsearch/test_dsl/_async/test_search.py create mode 100644 test_elasticsearch/test_dsl/_async/test_update_by_query.py create mode 100644 test_elasticsearch/test_dsl/_sync/__init__.py create mode 100644 test_elasticsearch/test_dsl/_sync/test_document.py create mode 100644 test_elasticsearch/test_dsl/_sync/test_faceted_search.py create mode 100644 test_elasticsearch/test_dsl/_sync/test_index.py create mode 100644 test_elasticsearch/test_dsl/_sync/test_mapping.py create mode 100644 test_elasticsearch/test_dsl/_sync/test_search.py create mode 100644 test_elasticsearch/test_dsl/_sync/test_update_by_query.py create mode 100644 test_elasticsearch/test_dsl/async_sleep.py create mode 100644 test_elasticsearch/test_dsl/conftest.py create mode 100644 test_elasticsearch/test_dsl/sleep.py create mode 100644 test_elasticsearch/test_dsl/test_aggs.py create mode 100644 test_elasticsearch/test_dsl/test_analysis.py create mode 100644 test_elasticsearch/test_dsl/test_connections.py create mode 100644 test_elasticsearch/test_dsl/test_field.py create mode 100644 test_elasticsearch/test_dsl/test_integration/__init__.py create mode 100644 test_elasticsearch/test_dsl/test_integration/_async/__init__.py create mode 100644 test_elasticsearch/test_dsl/test_integration/_async/test_analysis.py create mode 100644 test_elasticsearch/test_dsl/test_integration/_async/test_document.py create mode 100644 test_elasticsearch/test_dsl/test_integration/_async/test_faceted_search.py create mode 100644 test_elasticsearch/test_dsl/test_integration/_async/test_index.py create mode 100644 test_elasticsearch/test_dsl/test_integration/_async/test_mapping.py create mode 100644 test_elasticsearch/test_dsl/test_integration/_async/test_search.py create mode 100644 test_elasticsearch/test_dsl/test_integration/_async/test_update_by_query.py create mode 100644 test_elasticsearch/test_dsl/test_integration/_sync/__init__.py create mode 100644 test_elasticsearch/test_dsl/test_integration/_sync/test_analysis.py create mode 100644 test_elasticsearch/test_dsl/test_integration/_sync/test_document.py create mode 100644 test_elasticsearch/test_dsl/test_integration/_sync/test_faceted_search.py create mode 100644 test_elasticsearch/test_dsl/test_integration/_sync/test_index.py create mode 100644 test_elasticsearch/test_dsl/test_integration/_sync/test_mapping.py create mode 100644 test_elasticsearch/test_dsl/test_integration/_sync/test_search.py create mode 100644 test_elasticsearch/test_dsl/test_integration/_sync/test_update_by_query.py create mode 100644 test_elasticsearch/test_dsl/test_integration/test_count.py create mode 100644 test_elasticsearch/test_dsl/test_integration/test_data.py create mode 100644 test_elasticsearch/test_dsl/test_integration/test_examples/__init__.py create mode 100644 test_elasticsearch/test_dsl/test_integration/test_examples/_async/__init__.py create mode 100644 test_elasticsearch/test_dsl/test_integration/test_examples/_async/test_alias_migration.py create mode 100644 test_elasticsearch/test_dsl/test_integration/test_examples/_async/test_completion.py create mode 100644 test_elasticsearch/test_dsl/test_integration/test_examples/_async/test_composite_aggs.py create mode 100644 test_elasticsearch/test_dsl/test_integration/test_examples/_async/test_parent_child.py create mode 100644 test_elasticsearch/test_dsl/test_integration/test_examples/_async/test_percolate.py create mode 100644 test_elasticsearch/test_dsl/test_integration/test_examples/_async/test_vectors.py create mode 100644 test_elasticsearch/test_dsl/test_integration/test_examples/_sync/__init__.py create mode 100644 test_elasticsearch/test_dsl/test_integration/test_examples/_sync/test_alias_migration.py create mode 100644 test_elasticsearch/test_dsl/test_integration/test_examples/_sync/test_completion.py create mode 100644 test_elasticsearch/test_dsl/test_integration/test_examples/_sync/test_composite_aggs.py create mode 100644 test_elasticsearch/test_dsl/test_integration/test_examples/_sync/test_parent_child.py create mode 100644 test_elasticsearch/test_dsl/test_integration/test_examples/_sync/test_percolate.py create mode 100644 test_elasticsearch/test_dsl/test_integration/test_examples/_sync/test_vectors.py create mode 120000 test_elasticsearch/test_dsl/test_integration/test_examples/async_examples create mode 120000 test_elasticsearch/test_dsl/test_integration/test_examples/examples create mode 100644 test_elasticsearch/test_dsl/test_package.py create mode 100644 test_elasticsearch/test_dsl/test_query.py create mode 100644 test_elasticsearch/test_dsl/test_result.py create mode 100644 test_elasticsearch/test_dsl/test_utils.py create mode 100644 test_elasticsearch/test_dsl/test_validation.py create mode 100644 test_elasticsearch/test_dsl/test_wrappers.py diff --git a/elasticsearch/dsl/__init__.py b/elasticsearch/dsl/__init__.py new file mode 100644 index 000000000..a91e84424 --- /dev/null +++ b/elasticsearch/dsl/__init__.py @@ -0,0 +1,206 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from . import async_connections, connections +from .aggs import A, Agg +from .analysis import analyzer, char_filter, normalizer, token_filter, tokenizer +from .document import AsyncDocument, Document +from .document_base import InnerDoc, M, MetaField, mapped_field +from .exceptions import ( + ElasticsearchDslException, + IllegalOperation, + UnknownDslObject, + ValidationException, +) +from .faceted_search import ( + AsyncFacetedSearch, + DateHistogramFacet, + Facet, + FacetedResponse, + FacetedSearch, + HistogramFacet, + NestedFacet, + RangeFacet, + TermsFacet, +) +from .field import ( + Binary, + Boolean, + Byte, + Completion, + ConstantKeyword, + CustomField, + Date, + DateRange, + DenseVector, + Double, + DoubleRange, + Field, + Float, + FloatRange, + GeoPoint, + GeoShape, + HalfFloat, + Integer, + IntegerRange, + Ip, + IpRange, + Join, + Keyword, + Long, + LongRange, + Murmur3, + Nested, + Object, + Percolator, + Point, + RangeField, + RankFeature, + RankFeatures, + ScaledFloat, + SearchAsYouType, + Shape, + Short, + SparseVector, + Text, + TokenCount, + construct_field, +) +from .function import SF +from .index import ( + AsyncComposableIndexTemplate, + AsyncIndex, + AsyncIndexTemplate, + ComposableIndexTemplate, + Index, + IndexTemplate, +) +from .mapping import AsyncMapping, Mapping +from .query import Q, Query +from .response import AggResponse, Response, UpdateByQueryResponse +from .search import ( + AsyncEmptySearch, + AsyncMultiSearch, + AsyncSearch, + EmptySearch, + MultiSearch, + Search, +) +from .update_by_query import AsyncUpdateByQuery, UpdateByQuery +from .utils import AttrDict, AttrList, DslBase +from .wrappers import Range + +VERSION = (8, 17, 1) +__version__ = VERSION +__versionstr__ = ".".join(map(str, VERSION)) +__all__ = [ + "A", + "Agg", + "AggResponse", + "AsyncComposableIndexTemplate", + "AsyncDocument", + "AsyncEmptySearch", + "AsyncFacetedSearch", + "AsyncIndex", + "AsyncIndexTemplate", + "AsyncMapping", + "AsyncMultiSearch", + "AsyncSearch", + "AsyncUpdateByQuery", + "AttrDict", + "AttrList", + "Binary", + "Boolean", + "Byte", + "Completion", + "ComposableIndexTemplate", + "ConstantKeyword", + "CustomField", + "Date", + "DateHistogramFacet", + "DateRange", + "DenseVector", + "Document", + "Double", + "DoubleRange", + "DslBase", + "ElasticsearchDslException", + "EmptySearch", + "Facet", + "FacetedResponse", + "FacetedSearch", + "Field", + "Float", + "FloatRange", + "GeoPoint", + "GeoShape", + "HalfFloat", + "HistogramFacet", + "IllegalOperation", + "Index", + "IndexTemplate", + "InnerDoc", + "Integer", + "IntegerRange", + "Ip", + "IpRange", + "Join", + "Keyword", + "Long", + "LongRange", + "M", + "Mapping", + "MetaField", + "MultiSearch", + "Murmur3", + "Nested", + "NestedFacet", + "Object", + "Percolator", + "Point", + "Q", + "Query", + "Range", + "RangeFacet", + "RangeField", + "RankFeature", + "RankFeatures", + "Response", + "SF", + "ScaledFloat", + "Search", + "SearchAsYouType", + "Shape", + "Short", + "SparseVector", + "TermsFacet", + "Text", + "TokenCount", + "UnknownDslObject", + "UpdateByQuery", + "UpdateByQueryResponse", + "ValidationException", + "analyzer", + "async_connections", + "char_filter", + "connections", + "construct_field", + "mapped_field", + "normalizer", + "token_filter", + "tokenizer", +] diff --git a/elasticsearch/dsl/_async/__init__.py b/elasticsearch/dsl/_async/__init__.py new file mode 100644 index 000000000..2a87d183f --- /dev/null +++ b/elasticsearch/dsl/_async/__init__.py @@ -0,0 +1,16 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/elasticsearch/dsl/_async/document.py b/elasticsearch/dsl/_async/document.py new file mode 100644 index 000000000..3f5d69f11 --- /dev/null +++ b/elasticsearch/dsl/_async/document.py @@ -0,0 +1,521 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import collections.abc +from typing import ( + TYPE_CHECKING, + Any, + AsyncIterable, + Dict, + List, + Optional, + Tuple, + Union, + cast, +) + +from elasticsearch.exceptions import NotFoundError, RequestError +from elasticsearch.helpers import async_bulk +from typing_extensions import Self, dataclass_transform + +from .._async.index import AsyncIndex +from ..async_connections import get_connection +from ..document_base import DocumentBase, DocumentMeta, mapped_field +from ..exceptions import IllegalOperation +from ..utils import DOC_META_FIELDS, META_FIELDS, AsyncUsingType, merge +from .search import AsyncSearch + +if TYPE_CHECKING: + from elasticsearch import AsyncElasticsearch + + +class AsyncIndexMeta(DocumentMeta): + _index: AsyncIndex + + # global flag to guard us from associating an Index with the base Document + # class, only user defined subclasses should have an _index attr + _document_initialized = False + + def __new__( + cls, name: str, bases: Tuple[type, ...], attrs: Dict[str, Any] + ) -> "AsyncIndexMeta": + new_cls = super().__new__(cls, name, bases, attrs) + if cls._document_initialized: + index_opts = attrs.pop("Index", None) + index = cls.construct_index(index_opts, bases) + new_cls._index = index + index.document(new_cls) + cls._document_initialized = True + return cast(AsyncIndexMeta, new_cls) + + @classmethod + def construct_index( + cls, opts: Dict[str, Any], bases: Tuple[type, ...] + ) -> AsyncIndex: + if opts is None: + for b in bases: + if hasattr(b, "_index"): + return b._index + + # Set None as Index name so it will set _all while making the query + return AsyncIndex(name=None) + + i = AsyncIndex( + getattr(opts, "name", "*"), using=getattr(opts, "using", "default") + ) + i.settings(**getattr(opts, "settings", {})) + i.aliases(**getattr(opts, "aliases", {})) + for a in getattr(opts, "analyzers", ()): + i.analyzer(a) + return i + + +@dataclass_transform(field_specifiers=(mapped_field,)) +class AsyncDocument(DocumentBase, metaclass=AsyncIndexMeta): + """ + Model-like class for persisting documents in elasticsearch. + """ + + if TYPE_CHECKING: + _index: AsyncIndex + + @classmethod + def _get_using(cls, using: Optional[AsyncUsingType] = None) -> AsyncUsingType: + return cast(AsyncUsingType, using or cls._index._using) + + @classmethod + def _get_connection( + cls, using: Optional[AsyncUsingType] = None + ) -> "AsyncElasticsearch": + return get_connection(cls._get_using(using)) + + @classmethod + async def init( + cls, index: Optional[str] = None, using: Optional[AsyncUsingType] = None + ) -> None: + """ + Create the index and populate the mappings in elasticsearch. + """ + i = cls._index + if index: + i = i.clone(name=index) + await i.save(using=using) + + @classmethod + def search( + cls, using: Optional[AsyncUsingType] = None, index: Optional[str] = None + ) -> AsyncSearch[Self]: + """ + Create an :class:`~elasticsearch.dsl.Search` instance that will search + over this ``Document``. + """ + return AsyncSearch( + using=cls._get_using(using), index=cls._default_index(index), doc_type=[cls] + ) + + @classmethod + async def get( + cls, + id: str, + using: Optional[AsyncUsingType] = None, + index: Optional[str] = None, + **kwargs: Any, + ) -> Optional[Self]: + """ + Retrieve a single document from elasticsearch using its ``id``. + + :arg id: ``id`` of the document to be retrieved + :arg index: elasticsearch index to use, if the ``Document`` is + associated with an index this can be omitted. + :arg using: connection alias to use, defaults to ``'default'`` + + Any additional keyword arguments will be passed to + ``Elasticsearch.get`` unchanged. + """ + es = cls._get_connection(using) + doc = await es.get(index=cls._default_index(index), id=id, **kwargs) + if not doc.get("found", False): + return None + return cls.from_es(doc) + + @classmethod + async def exists( + cls, + id: str, + using: Optional[AsyncUsingType] = None, + index: Optional[str] = None, + **kwargs: Any, + ) -> bool: + """ + check if exists a single document from elasticsearch using its ``id``. + + :arg id: ``id`` of the document to check if exists + :arg index: elasticsearch index to use, if the ``Document`` is + associated with an index this can be omitted. + :arg using: connection alias to use, defaults to ``'default'`` + + Any additional keyword arguments will be passed to + ``Elasticsearch.exists`` unchanged. + """ + es = cls._get_connection(using) + return bool(await es.exists(index=cls._default_index(index), id=id, **kwargs)) + + @classmethod + async def mget( + cls, + docs: List[Dict[str, Any]], + using: Optional[AsyncUsingType] = None, + index: Optional[str] = None, + raise_on_error: bool = True, + missing: str = "none", + **kwargs: Any, + ) -> List[Optional[Self]]: + r""" + Retrieve multiple document by their ``id``\s. Returns a list of instances + in the same order as requested. + + :arg docs: list of ``id``\s of the documents to be retrieved or a list + of document specifications as per + https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-multi-get.html + :arg index: elasticsearch index to use, if the ``Document`` is + associated with an index this can be omitted. + :arg using: connection alias to use, defaults to ``'default'`` + :arg missing: what to do when one of the documents requested is not + found. Valid options are ``'none'`` (use ``None``), ``'raise'`` (raise + ``NotFoundError``) or ``'skip'`` (ignore the missing document). + + Any additional keyword arguments will be passed to + ``Elasticsearch.mget`` unchanged. + """ + if missing not in ("raise", "skip", "none"): + raise ValueError("'missing' must be 'raise', 'skip', or 'none'.") + es = cls._get_connection(using) + body = { + "docs": [ + doc if isinstance(doc, collections.abc.Mapping) else {"_id": doc} + for doc in docs + ] + } + results = await es.mget(index=cls._default_index(index), body=body, **kwargs) + + objs: List[Optional[Self]] = [] + error_docs: List[Self] = [] + missing_docs: List[Self] = [] + for doc in results["docs"]: + if doc.get("found"): + if error_docs or missing_docs: + # We're going to raise an exception anyway, so avoid an + # expensive call to cls.from_es(). + continue + + objs.append(cls.from_es(doc)) + + elif doc.get("error"): + if raise_on_error: + error_docs.append(doc) + if missing == "none": + objs.append(None) + + # The doc didn't cause an error, but the doc also wasn't found. + elif missing == "raise": + missing_docs.append(doc) + elif missing == "none": + objs.append(None) + + if error_docs: + error_ids = [doc["_id"] for doc in error_docs] + message = "Required routing not provided for documents %s." + message %= ", ".join(error_ids) + raise RequestError(400, message, error_docs) # type: ignore + if missing_docs: + missing_ids = [doc["_id"] for doc in missing_docs] + message = f"Documents {', '.join(missing_ids)} not found." + raise NotFoundError(404, message, {"docs": missing_docs}) # type: ignore + return objs + + async def delete( + self, + using: Optional[AsyncUsingType] = None, + index: Optional[str] = None, + **kwargs: Any, + ) -> None: + """ + Delete the instance in elasticsearch. + + :arg index: elasticsearch index to use, if the ``Document`` is + associated with an index this can be omitted. + :arg using: connection alias to use, defaults to ``'default'`` + + Any additional keyword arguments will be passed to + ``Elasticsearch.delete`` unchanged. + """ + es = self._get_connection(using) + # extract routing etc from meta + doc_meta = {k: self.meta[k] for k in DOC_META_FIELDS if k in self.meta} + + # Optimistic concurrency control + if "seq_no" in self.meta and "primary_term" in self.meta: + doc_meta["if_seq_no"] = self.meta["seq_no"] + doc_meta["if_primary_term"] = self.meta["primary_term"] + + doc_meta.update(kwargs) + i = self._get_index(index) + assert i is not None + + await es.delete(index=i, **doc_meta) + + async def update( + self, + using: Optional[AsyncUsingType] = None, + index: Optional[str] = None, + detect_noop: bool = True, + doc_as_upsert: bool = False, + refresh: bool = False, + retry_on_conflict: Optional[int] = None, + script: Optional[Union[str, Dict[str, Any]]] = None, + script_id: Optional[str] = None, + scripted_upsert: bool = False, + upsert: Optional[Dict[str, Any]] = None, + return_doc_meta: bool = False, + **fields: Any, + ) -> Any: + """ + Partial update of the document, specify fields you wish to update and + both the instance and the document in elasticsearch will be updated:: + + doc = MyDocument(title='Document Title!') + doc.save() + doc.update(title='New Document Title!') + + :arg index: elasticsearch index to use, if the ``Document`` is + associated with an index this can be omitted. + :arg using: connection alias to use, defaults to ``'default'`` + :arg detect_noop: Set to ``False`` to disable noop detection. + :arg refresh: Control when the changes made by this request are visible + to search. Set to ``True`` for immediate effect. + :arg retry_on_conflict: In between the get and indexing phases of the + update, it is possible that another process might have already + updated the same document. By default, the update will fail with a + version conflict exception. The retry_on_conflict parameter + controls how many times to retry the update before finally throwing + an exception. + :arg doc_as_upsert: Instead of sending a partial doc plus an upsert + doc, setting doc_as_upsert to true will use the contents of doc as + the upsert value + :arg script: the source code of the script as a string, or a dictionary + with script attributes to update. + :arg return_doc_meta: set to ``True`` to return all metadata from the + index API call instead of only the operation result + + :return: operation result noop/updated + """ + body: Dict[str, Any] = { + "doc_as_upsert": doc_as_upsert, + "detect_noop": detect_noop, + } + + # scripted update + if script or script_id: + if upsert is not None: + body["upsert"] = upsert + + if script: + if isinstance(script, str): + script = {"source": script} + else: + script = {"id": script_id} + + if "params" not in script: + script["params"] = fields + else: + script["params"].update(fields) + + body["script"] = script + body["scripted_upsert"] = scripted_upsert + + # partial document update + else: + if not fields: + raise IllegalOperation( + "You cannot call update() without updating individual fields or a script. " + "If you wish to update the entire object use save()." + ) + + # update given fields locally + merge(self, fields) + + # prepare data for ES + values = self.to_dict(skip_empty=False) + + # if fields were given: partial update + body["doc"] = {k: values.get(k) for k in fields.keys()} + + # extract routing etc from meta + doc_meta = {k: self.meta[k] for k in DOC_META_FIELDS if k in self.meta} + + if retry_on_conflict is not None: + doc_meta["retry_on_conflict"] = retry_on_conflict + + # Optimistic concurrency control + if ( + retry_on_conflict in (None, 0) + and "seq_no" in self.meta + and "primary_term" in self.meta + ): + doc_meta["if_seq_no"] = self.meta["seq_no"] + doc_meta["if_primary_term"] = self.meta["primary_term"] + + i = self._get_index(index) + assert i is not None + + meta = await self._get_connection(using).update( + index=i, body=body, refresh=refresh, **doc_meta + ) + + # update meta information from ES + for k in META_FIELDS: + if "_" + k in meta: + setattr(self.meta, k, meta["_" + k]) + + return meta if return_doc_meta else meta["result"] + + async def save( + self, + using: Optional[AsyncUsingType] = None, + index: Optional[str] = None, + validate: bool = True, + skip_empty: bool = True, + return_doc_meta: bool = False, + **kwargs: Any, + ) -> Any: + """ + Save the document into elasticsearch. If the document doesn't exist it + is created, it is overwritten otherwise. Returns ``True`` if this + operations resulted in new document being created. + + :arg index: elasticsearch index to use, if the ``Document`` is + associated with an index this can be omitted. + :arg using: connection alias to use, defaults to ``'default'`` + :arg validate: set to ``False`` to skip validating the document + :arg skip_empty: if set to ``False`` will cause empty values (``None``, + ``[]``, ``{}``) to be left on the document. Those values will be + stripped out otherwise as they make no difference in elasticsearch. + :arg return_doc_meta: set to ``True`` to return all metadata from the + update API call instead of only the operation result + + Any additional keyword arguments will be passed to + ``Elasticsearch.index`` unchanged. + + :return: operation result created/updated + """ + if validate: + self.full_clean() + + es = self._get_connection(using) + # extract routing etc from meta + doc_meta = {k: self.meta[k] for k in DOC_META_FIELDS if k in self.meta} + + # Optimistic concurrency control + if "seq_no" in self.meta and "primary_term" in self.meta: + doc_meta["if_seq_no"] = self.meta["seq_no"] + doc_meta["if_primary_term"] = self.meta["primary_term"] + + doc_meta.update(kwargs) + i = self._get_index(index) + assert i is not None + + meta = await es.index( + index=i, + body=self.to_dict(skip_empty=skip_empty), + **doc_meta, + ) + # update meta information from ES + for k in META_FIELDS: + if "_" + k in meta: + setattr(self.meta, k, meta["_" + k]) + + return meta if return_doc_meta else meta["result"] + + @classmethod + async def bulk( + cls, + actions: AsyncIterable[Union[Self, Dict[str, Any]]], + using: Optional[AsyncUsingType] = None, + index: Optional[str] = None, + validate: bool = True, + skip_empty: bool = True, + **kwargs: Any, + ) -> Tuple[int, Union[int, List[Any]]]: + """ + Allows to perform multiple indexing operations in a single request. + + :arg actions: a generator that returns document instances to be indexed, + bulk operation dictionaries. + :arg using: connection alias to use, defaults to ``'default'`` + :arg index: Elasticsearch index to use, if the ``Document`` is + associated with an index this can be omitted. + :arg validate: set to ``False`` to skip validating the documents + :arg skip_empty: if set to ``False`` will cause empty values (``None``, + ``[]``, ``{}``) to be left on the document. Those values will be + stripped out otherwise as they make no difference in Elasticsearch. + + Any additional keyword arguments will be passed to + ``Elasticsearch.bulk`` unchanged. + + :return: bulk operation results + """ + es = cls._get_connection(using) + + i = cls._default_index(index) + assert i is not None + + class Generate: + def __init__( + self, + doc_iterator: AsyncIterable[Union[AsyncDocument, Dict[str, Any]]], + ): + self.doc_iterator = doc_iterator.__aiter__() + + def __aiter__(self) -> Self: + return self + + async def __anext__(self) -> Dict[str, Any]: + doc: Optional[Union[AsyncDocument, Dict[str, Any]]] = ( + await self.doc_iterator.__anext__() + ) + + if isinstance(doc, dict): + action = doc + doc = None + if "_source" in action and isinstance( + action["_source"], AsyncDocument + ): + doc = action["_source"] + if validate: # pragma: no cover + doc.full_clean() + action["_source"] = doc.to_dict( + include_meta=False, skip_empty=skip_empty + ) + elif doc is not None: + if validate: # pragma: no cover + doc.full_clean() + action = doc.to_dict(include_meta=True, skip_empty=skip_empty) + if "_index" not in action: + action["_index"] = i + return action + + return await async_bulk(es, Generate(actions), **kwargs) diff --git a/elasticsearch/dsl/_async/faceted_search.py b/elasticsearch/dsl/_async/faceted_search.py new file mode 100644 index 000000000..199dcfca1 --- /dev/null +++ b/elasticsearch/dsl/_async/faceted_search.py @@ -0,0 +1,51 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import TYPE_CHECKING + +from ..faceted_search_base import FacetedResponse, FacetedSearchBase + +from ..utils import _R +from .search import AsyncSearch + +if TYPE_CHECKING: + from ..response import Response + + +class AsyncFacetedSearch(FacetedSearchBase[_R]): + _s: AsyncSearch[_R] + + async def count(self) -> int: + return await self._s.count() + + def search(self) -> AsyncSearch[_R]: + """ + Returns the base Search object to which the facets are added. + + You can customize the query by overriding this method and returning a + modified search object. + """ + s = AsyncSearch[_R](doc_type=self.doc_types, index=self.index, using=self.using) + return s.response_class(FacetedResponse) + + async def execute(self) -> "Response[_R]": + """ + Execute the search and return the response. + """ + r = await self._s.execute() + r._faceted_search = self + return r diff --git a/elasticsearch/dsl/_async/index.py b/elasticsearch/dsl/_async/index.py new file mode 100644 index 000000000..71542dffd --- /dev/null +++ b/elasticsearch/dsl/_async/index.py @@ -0,0 +1,638 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import TYPE_CHECKING, Any, Dict, Optional + +from typing_extensions import Self + +from ..async_connections import get_connection +from ..exceptions import IllegalOperation +from ..index_base import IndexBase +from ..utils import AsyncUsingType +from .mapping import AsyncMapping +from .search import AsyncSearch +from .update_by_query import AsyncUpdateByQuery + +if TYPE_CHECKING: + from elastic_transport import ObjectApiResponse + from elasticsearch import AsyncElasticsearch + + +class AsyncIndexTemplate: + def __init__( + self, + name: str, + template: str, + index: Optional["AsyncIndex"] = None, + order: Optional[int] = None, + **kwargs: Any, + ): + if index is None: + self._index = AsyncIndex(template, **kwargs) + else: + if kwargs: + raise ValueError( + "You cannot specify options for Index when" + " passing an Index instance." + ) + self._index = index.clone() + self._index._name = template + self._template_name = name + self.order = order + + def __getattr__(self, attr_name: str) -> Any: + return getattr(self._index, attr_name) + + def to_dict(self) -> Dict[str, Any]: + d = self._index.to_dict() + d["index_patterns"] = [self._index._name] + if self.order is not None: + d["order"] = self.order + return d + + async def save( + self, using: Optional[AsyncUsingType] = None + ) -> "ObjectApiResponse[Any]": + es = get_connection(using or self._index._using) + return await es.indices.put_template( + name=self._template_name, body=self.to_dict() + ) + + +class AsyncComposableIndexTemplate: + def __init__( + self, + name: str, + template: str, + index: Optional["AsyncIndex"] = None, + priority: Optional[int] = None, + **kwargs: Any, + ): + if index is None: + self._index = AsyncIndex(template, **kwargs) + else: + if kwargs: + raise ValueError( + "You cannot specify options for Index when" + " passing an Index instance." + ) + self._index = index.clone() + self._index._name = template + self._template_name = name + self.priority = priority + + def __getattr__(self, attr_name: str) -> Any: + return getattr(self._index, attr_name) + + def to_dict(self) -> Dict[str, Any]: + d: Dict[str, Any] = {"template": self._index.to_dict()} + d["index_patterns"] = [self._index._name] + if self.priority is not None: + d["priority"] = self.priority + return d + + async def save( + self, using: Optional[AsyncUsingType] = None + ) -> "ObjectApiResponse[Any]": + es = get_connection(using or self._index._using) + return await es.indices.put_index_template( + name=self._template_name, **self.to_dict() + ) + + +class AsyncIndex(IndexBase): + _using: AsyncUsingType + + if TYPE_CHECKING: + + def get_or_create_mapping(self) -> AsyncMapping: ... + + def __init__(self, name: str, using: AsyncUsingType = "default"): + """ + :arg name: name of the index + :arg using: connection alias to use, defaults to ``'default'`` + """ + super().__init__(name, AsyncMapping, using=using) + + def _get_connection( + self, using: Optional[AsyncUsingType] = None + ) -> "AsyncElasticsearch": + if self._name is None: + raise ValueError("You cannot perform API calls on the default index.") + return get_connection(using or self._using) + + connection = property(_get_connection) + + def as_template( + self, + template_name: str, + pattern: Optional[str] = None, + order: Optional[int] = None, + ) -> AsyncIndexTemplate: + return AsyncIndexTemplate( + template_name, pattern or self._name, index=self, order=order + ) + + def as_composable_template( + self, + template_name: str, + pattern: Optional[str] = None, + priority: Optional[int] = None, + ) -> AsyncComposableIndexTemplate: + return AsyncComposableIndexTemplate( + template_name, pattern or self._name, index=self, priority=priority + ) + + async def load_mappings(self, using: Optional[AsyncUsingType] = None) -> None: + await self.get_or_create_mapping().update_from_es( + self._name, using=using or self._using + ) + + def clone( + self, name: Optional[str] = None, using: Optional[AsyncUsingType] = None + ) -> Self: + """ + Create a copy of the instance with another name or connection alias. + Useful for creating multiple indices with shared configuration:: + + i = Index('base-index') + i.settings(number_of_shards=1) + i.create() + + i2 = i.clone('other-index') + i2.create() + + :arg name: name of the index + :arg using: connection alias to use, defaults to ``'default'`` + """ + i = self.__class__(name or self._name, using=using or self._using) + i._settings = self._settings.copy() + i._aliases = self._aliases.copy() + i._analysis = self._analysis.copy() + i._doc_types = self._doc_types[:] + if self._mapping is not None: + i._mapping = self._mapping._clone() + return i + + def search(self, using: Optional[AsyncUsingType] = None) -> AsyncSearch: + """ + Return a :class:`~elasticsearch.dsl.Search` object searching over the + index (or all the indices belonging to this template) and its + ``Document``\\s. + """ + return AsyncSearch( + using=using or self._using, index=self._name, doc_type=self._doc_types + ) + + def updateByQuery( + self, using: Optional[AsyncUsingType] = None + ) -> AsyncUpdateByQuery: + """ + Return a :class:`~elasticsearch.dsl.UpdateByQuery` object searching over the index + (or all the indices belonging to this template) and updating Documents that match + the search criteria. + + For more information, see here: + https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-update-by-query.html + """ + return AsyncUpdateByQuery( + using=using or self._using, + index=self._name, + ) + + async def create( + self, using: Optional[AsyncUsingType] = None, **kwargs: Any + ) -> "ObjectApiResponse[Any]": + """ + Creates the index in elasticsearch. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.create`` unchanged. + """ + return await self._get_connection(using).indices.create( + index=self._name, body=self.to_dict(), **kwargs + ) + + async def is_closed(self, using: Optional[AsyncUsingType] = None) -> bool: + state = await self._get_connection(using).cluster.state( + index=self._name, metric="metadata" + ) + return bool(state["metadata"]["indices"][self._name]["state"] == "close") + + async def save( + self, using: Optional[AsyncUsingType] = None + ) -> "Optional[ObjectApiResponse[Any]]": + """ + Sync the index definition with elasticsearch, creating the index if it + doesn't exist and updating its settings and mappings if it does. + + Note some settings and mapping changes cannot be done on an open + index (or at all on an existing index) and for those this method will + fail with the underlying exception. + """ + if not await self.exists(using=using): + return await self.create(using=using) + + body = self.to_dict() + settings = body.pop("settings", {}) + analysis = settings.pop("analysis", None) + current_settings = (await self.get_settings(using=using))[self._name][ + "settings" + ]["index"] + if analysis: + if await self.is_closed(using=using): + # closed index, update away + settings["analysis"] = analysis + else: + # compare analysis definition, if all analysis objects are + # already defined as requested, skip analysis update and + # proceed, otherwise raise IllegalOperation + existing_analysis = current_settings.get("analysis", {}) + if any( + existing_analysis.get(section, {}).get(k, None) + != analysis[section][k] + for section in analysis + for k in analysis[section] + ): + raise IllegalOperation( + "You cannot update analysis configuration on an open index, " + "you need to close index %s first." % self._name + ) + + # try and update the settings + if settings: + settings = settings.copy() + for k, v in list(settings.items()): + if k in current_settings and current_settings[k] == str(v): + del settings[k] + + if settings: + await self.put_settings(using=using, body=settings) + + # update the mappings, any conflict in the mappings will result in an + # exception + mappings = body.pop("mappings", {}) + if mappings: + return await self.put_mapping(using=using, body=mappings) + + return None + + async def analyze( + self, using: Optional[AsyncUsingType] = None, **kwargs: Any + ) -> "ObjectApiResponse[Any]": + """ + Perform the analysis process on a text and return the tokens breakdown + of the text. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.analyze`` unchanged. + """ + return await self._get_connection(using).indices.analyze( + index=self._name, **kwargs + ) + + async def refresh( + self, using: Optional[AsyncUsingType] = None, **kwargs: Any + ) -> "ObjectApiResponse[Any]": + """ + Performs a refresh operation on the index. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.refresh`` unchanged. + """ + return await self._get_connection(using).indices.refresh( + index=self._name, **kwargs + ) + + async def flush( + self, using: Optional[AsyncUsingType] = None, **kwargs: Any + ) -> "ObjectApiResponse[Any]": + """ + Performs a flush operation on the index. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.flush`` unchanged. + """ + return await self._get_connection(using).indices.flush( + index=self._name, **kwargs + ) + + async def get( + self, using: Optional[AsyncUsingType] = None, **kwargs: Any + ) -> "ObjectApiResponse[Any]": + """ + The get index API allows to retrieve information about the index. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.get`` unchanged. + """ + return await self._get_connection(using).indices.get(index=self._name, **kwargs) + + async def open( + self, using: Optional[AsyncUsingType] = None, **kwargs: Any + ) -> "ObjectApiResponse[Any]": + """ + Opens the index in elasticsearch. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.open`` unchanged. + """ + return await self._get_connection(using).indices.open( + index=self._name, **kwargs + ) + + async def close( + self, using: Optional[AsyncUsingType] = None, **kwargs: Any + ) -> "ObjectApiResponse[Any]": + """ + Closes the index in elasticsearch. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.close`` unchanged. + """ + return await self._get_connection(using).indices.close( + index=self._name, **kwargs + ) + + async def delete( + self, using: Optional[AsyncUsingType] = None, **kwargs: Any + ) -> "ObjectApiResponse[Any]": + """ + Deletes the index in elasticsearch. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.delete`` unchanged. + """ + return await self._get_connection(using).indices.delete( + index=self._name, **kwargs + ) + + async def exists( + self, using: Optional[AsyncUsingType] = None, **kwargs: Any + ) -> bool: + """ + Returns ``True`` if the index already exists in elasticsearch. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.exists`` unchanged. + """ + return bool( + await self._get_connection(using).indices.exists(index=self._name, **kwargs) + ) + + async def put_mapping( + self, using: Optional[AsyncUsingType] = None, **kwargs: Any + ) -> "ObjectApiResponse[Any]": + """ + Register specific mapping definition for a specific type. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.put_mapping`` unchanged. + """ + return await self._get_connection(using).indices.put_mapping( + index=self._name, **kwargs + ) + + async def get_mapping( + self, using: Optional[AsyncUsingType] = None, **kwargs: Any + ) -> "ObjectApiResponse[Any]": + """ + Retrieve specific mapping definition for a specific type. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.get_mapping`` unchanged. + """ + return await self._get_connection(using).indices.get_mapping( + index=self._name, **kwargs + ) + + async def get_field_mapping( + self, using: Optional[AsyncUsingType] = None, **kwargs: Any + ) -> "ObjectApiResponse[Any]": + """ + Retrieve mapping definition of a specific field. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.get_field_mapping`` unchanged. + """ + return await self._get_connection(using).indices.get_field_mapping( + index=self._name, **kwargs + ) + + async def put_alias( + self, using: Optional[AsyncUsingType] = None, **kwargs: Any + ) -> "ObjectApiResponse[Any]": + """ + Create an alias for the index. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.put_alias`` unchanged. + """ + return await self._get_connection(using).indices.put_alias( + index=self._name, **kwargs + ) + + async def exists_alias( + self, using: Optional[AsyncUsingType] = None, **kwargs: Any + ) -> bool: + """ + Return a boolean indicating whether given alias exists for this index. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.exists_alias`` unchanged. + """ + return bool( + await self._get_connection(using).indices.exists_alias( + index=self._name, **kwargs + ) + ) + + async def get_alias( + self, using: Optional[AsyncUsingType] = None, **kwargs: Any + ) -> "ObjectApiResponse[Any]": + """ + Retrieve a specified alias. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.get_alias`` unchanged. + """ + return await self._get_connection(using).indices.get_alias( + index=self._name, **kwargs + ) + + async def delete_alias( + self, using: Optional[AsyncUsingType] = None, **kwargs: Any + ) -> "ObjectApiResponse[Any]": + """ + Delete specific alias. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.delete_alias`` unchanged. + """ + return await self._get_connection(using).indices.delete_alias( + index=self._name, **kwargs + ) + + async def get_settings( + self, using: Optional[AsyncUsingType] = None, **kwargs: Any + ) -> "ObjectApiResponse[Any]": + """ + Retrieve settings for the index. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.get_settings`` unchanged. + """ + return await self._get_connection(using).indices.get_settings( + index=self._name, **kwargs + ) + + async def put_settings( + self, using: Optional[AsyncUsingType] = None, **kwargs: Any + ) -> "ObjectApiResponse[Any]": + """ + Change specific index level settings in real time. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.put_settings`` unchanged. + """ + return await self._get_connection(using).indices.put_settings( + index=self._name, **kwargs + ) + + async def stats( + self, using: Optional[AsyncUsingType] = None, **kwargs: Any + ) -> "ObjectApiResponse[Any]": + """ + Retrieve statistics on different operations happening on the index. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.stats`` unchanged. + """ + return await self._get_connection(using).indices.stats( + index=self._name, **kwargs + ) + + async def segments( + self, using: Optional[AsyncUsingType] = None, **kwargs: Any + ) -> "ObjectApiResponse[Any]": + """ + Provide low level segments information that a Lucene index (shard + level) is built with. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.segments`` unchanged. + """ + return await self._get_connection(using).indices.segments( + index=self._name, **kwargs + ) + + async def validate_query( + self, using: Optional[AsyncUsingType] = None, **kwargs: Any + ) -> "ObjectApiResponse[Any]": + """ + Validate a potentially expensive query without executing it. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.validate_query`` unchanged. + """ + return await self._get_connection(using).indices.validate_query( + index=self._name, **kwargs + ) + + async def clear_cache( + self, using: Optional[AsyncUsingType] = None, **kwargs: Any + ) -> "ObjectApiResponse[Any]": + """ + Clear all caches or specific cached associated with the index. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.clear_cache`` unchanged. + """ + return await self._get_connection(using).indices.clear_cache( + index=self._name, **kwargs + ) + + async def recovery( + self, using: Optional[AsyncUsingType] = None, **kwargs: Any + ) -> "ObjectApiResponse[Any]": + """ + The indices recovery API provides insight into on-going shard + recoveries for the index. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.recovery`` unchanged. + """ + return await self._get_connection(using).indices.recovery( + index=self._name, **kwargs + ) + + async def shard_stores( + self, using: Optional[AsyncUsingType] = None, **kwargs: Any + ) -> "ObjectApiResponse[Any]": + """ + Provides store information for shard copies of the index. Store + information reports on which nodes shard copies exist, the shard copy + version, indicating how recent they are, and any exceptions encountered + while opening the shard index or from earlier engine failure. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.shard_stores`` unchanged. + """ + return await self._get_connection(using).indices.shard_stores( + index=self._name, **kwargs + ) + + async def forcemerge( + self, using: Optional[AsyncUsingType] = None, **kwargs: Any + ) -> "ObjectApiResponse[Any]": + """ + The force merge API allows to force merging of the index through an + API. The merge relates to the number of segments a Lucene index holds + within each shard. The force merge operation allows to reduce the + number of segments by merging them. + + This call will block until the merge is complete. If the http + connection is lost, the request will continue in the background, and + any new requests will block until the previous force merge is complete. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.forcemerge`` unchanged. + """ + return await self._get_connection(using).indices.forcemerge( + index=self._name, **kwargs + ) + + async def shrink( + self, using: Optional[AsyncUsingType] = None, **kwargs: Any + ) -> "ObjectApiResponse[Any]": + """ + The shrink index API allows you to shrink an existing index into a new + index with fewer primary shards. The number of primary shards in the + target index must be a factor of the shards in the source index. For + example an index with 8 primary shards can be shrunk into 4, 2 or 1 + primary shards or an index with 15 primary shards can be shrunk into 5, + 3 or 1. If the number of shards in the index is a prime number it can + only be shrunk into a single primary shard. Before shrinking, a + (primary or replica) copy of every shard in the index must be present + on the same node. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.shrink`` unchanged. + """ + return await self._get_connection(using).indices.shrink( + index=self._name, **kwargs + ) diff --git a/elasticsearch/dsl/_async/mapping.py b/elasticsearch/dsl/_async/mapping.py new file mode 100644 index 000000000..7ef9c6dac --- /dev/null +++ b/elasticsearch/dsl/_async/mapping.py @@ -0,0 +1,49 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import List, Optional, Union + +from typing_extensions import Self + +from ..async_connections import get_connection +from ..mapping_base import MappingBase +from ..utils import AsyncUsingType + + +class AsyncMapping(MappingBase): + @classmethod + async def from_es( + cls, index: Optional[Union[str, List[str]]], using: AsyncUsingType = "default" + ) -> Self: + m = cls() + await m.update_from_es(index, using) + return m + + async def update_from_es( + self, index: Optional[Union[str, List[str]]], using: AsyncUsingType = "default" + ) -> None: + es = get_connection(using) + raw = await es.indices.get_mapping(index=index) + _, raw = raw.popitem() + self._update_from_dict(raw["mappings"]) + + async def save(self, index: str, using: AsyncUsingType = "default") -> None: + from .index import AsyncIndex + + i = AsyncIndex(index, using=using) + i.mapping(self) + await i.save() diff --git a/elasticsearch/dsl/_async/search.py b/elasticsearch/dsl/_async/search.py new file mode 100644 index 000000000..ea6288622 --- /dev/null +++ b/elasticsearch/dsl/_async/search.py @@ -0,0 +1,232 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import contextlib +from typing import ( + TYPE_CHECKING, + Any, + AsyncIterator, + Dict, + Iterator, + List, + Optional, + cast, +) + +from elasticsearch.exceptions import ApiError +from elasticsearch.helpers import async_scan +from typing_extensions import Self + +from ..async_connections import get_connection +from ..response import Response +from ..search_base import MultiSearchBase, SearchBase +from ..utils import _R, AsyncUsingType, AttrDict + + +class AsyncSearch(SearchBase[_R]): + _using: AsyncUsingType + + def __aiter__(self) -> AsyncIterator[_R]: + """ + Iterate over the hits. + """ + + class ResultsIterator(AsyncIterator[_R]): + def __init__(self, search: AsyncSearch[_R]): + self.search = search + self.iterator: Optional[Iterator[_R]] = None + + async def __anext__(self) -> _R: + if self.iterator is None: + self.iterator = iter(await self.search.execute()) + try: + return next(self.iterator) + except StopIteration: + raise StopAsyncIteration() + + return ResultsIterator(self) + + async def count(self) -> int: + """ + Return the number of hits matching the query and filters. Note that + only the actual number is returned. + """ + if hasattr(self, "_response") and self._response.hits.total.relation == "eq": # type: ignore[attr-defined] + return cast(int, self._response.hits.total.value) # type: ignore[attr-defined] + + es = get_connection(self._using) + + d = self.to_dict(count=True) + # TODO: failed shards detection + resp = await es.count( + index=self._index, + query=cast(Optional[Dict[str, Any]], d.get("query", None)), + **self._params, + ) + + return cast(int, resp["count"]) + + async def execute(self, ignore_cache: bool = False) -> Response[_R]: + """ + Execute the search and return an instance of ``Response`` wrapping all + the data. + + :arg ignore_cache: if set to ``True``, consecutive calls will hit + ES, while cached result will be ignored. Defaults to `False` + """ + if ignore_cache or not hasattr(self, "_response"): + es = get_connection(self._using) + + self._response = self._response_class( + self, + ( + await es.search( + index=self._index, body=self.to_dict(), **self._params + ) + ).body, + ) + return self._response + + async def scan(self) -> AsyncIterator[_R]: + """ + Turn the search into a scan search and return a generator that will + iterate over all the documents matching the query. + + Use ``params`` method to specify any additional arguments you with to + pass to the underlying ``scan`` helper from ``elasticsearch-py`` - + https://elasticsearch-py.readthedocs.io/en/master/helpers.html#elasticsearch.helpers.scan + + The ``iterate()`` method should be preferred, as it provides similar + functionality using an Elasticsearch point in time. + """ + es = get_connection(self._using) + + async for hit in async_scan( + es, query=self.to_dict(), index=self._index, **self._params + ): + yield self._get_result(cast(AttrDict[Any], hit)) + + async def delete(self) -> AttrDict[Any]: + """ + delete() executes the query by delegating to delete_by_query() + """ + + es = get_connection(self._using) + assert self._index is not None + + return AttrDict( + cast( + Dict[str, Any], + await es.delete_by_query( + index=self._index, body=self.to_dict(), **self._params + ), + ) + ) + + @contextlib.asynccontextmanager + async def point_in_time(self, keep_alive: str = "1m") -> AsyncIterator[Self]: + """ + Open a point in time (pit) that can be used across several searches. + + This method implements a context manager that returns a search object + configured to operate within the created pit. + + :arg keep_alive: the time to live for the point in time, renewed with each search request + """ + es = get_connection(self._using) + + pit = await es.open_point_in_time( + index=self._index or "*", keep_alive=keep_alive + ) + search = self.index().extra(pit={"id": pit["id"], "keep_alive": keep_alive}) + if not search._sort: + search = search.sort("_shard_doc") + yield search + await es.close_point_in_time(id=pit["id"]) + + async def iterate(self, keep_alive: str = "1m") -> AsyncIterator[_R]: + """ + Return a generator that iterates over all the documents matching the query. + + This method uses a point in time to provide consistent results even when + the index is changing. It should be preferred over ``scan()``. + + :arg keep_alive: the time to live for the point in time, renewed with each new search request + """ + async with self.point_in_time(keep_alive=keep_alive) as s: + while True: + r = await s.execute() + for hit in r: + yield hit + if len(r.hits) == 0: + break + s = s.search_after() + + +class AsyncMultiSearch(MultiSearchBase[_R]): + """ + Combine multiple :class:`~elasticsearch.dsl.Search` objects into a single + request. + """ + + _using: AsyncUsingType + + if TYPE_CHECKING: + + def add(self, search: AsyncSearch[_R]) -> Self: ... # type: ignore[override] + + async def execute( + self, ignore_cache: bool = False, raise_on_error: bool = True + ) -> List[Response[_R]]: + """ + Execute the multi search request and return a list of search results. + """ + if ignore_cache or not hasattr(self, "_response"): + es = get_connection(self._using) + + responses = await es.msearch( + index=self._index, body=self.to_dict(), **self._params + ) + + out: List[Response[_R]] = [] + for s, r in zip(self._searches, responses["responses"]): + if r.get("error", False): + if raise_on_error: + raise ApiError("N/A", meta=responses.meta, body=r) + r = None + else: + r = Response(s, r) + out.append(r) + + self._response = out + + return self._response + + +class AsyncEmptySearch(AsyncSearch[_R]): + async def count(self) -> int: + return 0 + + async def execute(self, ignore_cache: bool = False) -> Response[_R]: + return self._response_class(self, {"hits": {"total": 0, "hits": []}}) + + async def scan(self) -> AsyncIterator[_R]: + return + yield # a bit strange, but this forces an empty generator function + + async def delete(self) -> AttrDict[Any]: + return AttrDict[Any]({}) diff --git a/elasticsearch/dsl/_async/update_by_query.py b/elasticsearch/dsl/_async/update_by_query.py new file mode 100644 index 000000000..bff3aa947 --- /dev/null +++ b/elasticsearch/dsl/_async/update_by_query.py @@ -0,0 +1,47 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import TYPE_CHECKING + +from ..async_connections import get_connection +from ..update_by_query_base import UpdateByQueryBase +from ..utils import _R, AsyncUsingType + +if TYPE_CHECKING: + from ..response import UpdateByQueryResponse + + +class AsyncUpdateByQuery(UpdateByQueryBase[_R]): + _using: AsyncUsingType + + async def execute(self) -> "UpdateByQueryResponse[_R]": + """ + Execute the search and return an instance of ``Response`` wrapping all + the data. + """ + es = get_connection(self._using) + assert self._index is not None + + self._response = self._response_class( + self, + ( + await es.update_by_query( + index=self._index, **self.to_dict(), **self._params + ) + ).body, + ) + return self._response diff --git a/elasticsearch/dsl/_sync/__init__.py b/elasticsearch/dsl/_sync/__init__.py new file mode 100644 index 000000000..2a87d183f --- /dev/null +++ b/elasticsearch/dsl/_sync/__init__.py @@ -0,0 +1,16 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/elasticsearch/dsl/_sync/document.py b/elasticsearch/dsl/_sync/document.py new file mode 100644 index 000000000..c8143412f --- /dev/null +++ b/elasticsearch/dsl/_sync/document.py @@ -0,0 +1,513 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import collections.abc +from typing import ( + TYPE_CHECKING, + Any, + Dict, + Iterable, + List, + Optional, + Tuple, + Union, + cast, +) + +from elasticsearch.exceptions import NotFoundError, RequestError +from elasticsearch.helpers import bulk +from typing_extensions import Self, dataclass_transform + +from .._sync.index import Index +from ..connections import get_connection +from ..document_base import DocumentBase, DocumentMeta, mapped_field +from ..exceptions import IllegalOperation +from ..utils import DOC_META_FIELDS, META_FIELDS, UsingType, merge +from .search import Search + +if TYPE_CHECKING: + from elasticsearch import Elasticsearch + + +class IndexMeta(DocumentMeta): + _index: Index + + # global flag to guard us from associating an Index with the base Document + # class, only user defined subclasses should have an _index attr + _document_initialized = False + + def __new__( + cls, name: str, bases: Tuple[type, ...], attrs: Dict[str, Any] + ) -> "IndexMeta": + new_cls = super().__new__(cls, name, bases, attrs) + if cls._document_initialized: + index_opts = attrs.pop("Index", None) + index = cls.construct_index(index_opts, bases) + new_cls._index = index + index.document(new_cls) + cls._document_initialized = True + return cast(IndexMeta, new_cls) + + @classmethod + def construct_index(cls, opts: Dict[str, Any], bases: Tuple[type, ...]) -> Index: + if opts is None: + for b in bases: + if hasattr(b, "_index"): + return b._index + + # Set None as Index name so it will set _all while making the query + return Index(name=None) + + i = Index(getattr(opts, "name", "*"), using=getattr(opts, "using", "default")) + i.settings(**getattr(opts, "settings", {})) + i.aliases(**getattr(opts, "aliases", {})) + for a in getattr(opts, "analyzers", ()): + i.analyzer(a) + return i + + +@dataclass_transform(field_specifiers=(mapped_field,)) +class Document(DocumentBase, metaclass=IndexMeta): + """ + Model-like class for persisting documents in elasticsearch. + """ + + if TYPE_CHECKING: + _index: Index + + @classmethod + def _get_using(cls, using: Optional[UsingType] = None) -> UsingType: + return cast(UsingType, using or cls._index._using) + + @classmethod + def _get_connection(cls, using: Optional[UsingType] = None) -> "Elasticsearch": + return get_connection(cls._get_using(using)) + + @classmethod + def init( + cls, index: Optional[str] = None, using: Optional[UsingType] = None + ) -> None: + """ + Create the index and populate the mappings in elasticsearch. + """ + i = cls._index + if index: + i = i.clone(name=index) + i.save(using=using) + + @classmethod + def search( + cls, using: Optional[UsingType] = None, index: Optional[str] = None + ) -> Search[Self]: + """ + Create an :class:`~elasticsearch.dsl.Search` instance that will search + over this ``Document``. + """ + return Search( + using=cls._get_using(using), index=cls._default_index(index), doc_type=[cls] + ) + + @classmethod + def get( + cls, + id: str, + using: Optional[UsingType] = None, + index: Optional[str] = None, + **kwargs: Any, + ) -> Optional[Self]: + """ + Retrieve a single document from elasticsearch using its ``id``. + + :arg id: ``id`` of the document to be retrieved + :arg index: elasticsearch index to use, if the ``Document`` is + associated with an index this can be omitted. + :arg using: connection alias to use, defaults to ``'default'`` + + Any additional keyword arguments will be passed to + ``Elasticsearch.get`` unchanged. + """ + es = cls._get_connection(using) + doc = es.get(index=cls._default_index(index), id=id, **kwargs) + if not doc.get("found", False): + return None + return cls.from_es(doc) + + @classmethod + def exists( + cls, + id: str, + using: Optional[UsingType] = None, + index: Optional[str] = None, + **kwargs: Any, + ) -> bool: + """ + check if exists a single document from elasticsearch using its ``id``. + + :arg id: ``id`` of the document to check if exists + :arg index: elasticsearch index to use, if the ``Document`` is + associated with an index this can be omitted. + :arg using: connection alias to use, defaults to ``'default'`` + + Any additional keyword arguments will be passed to + ``Elasticsearch.exists`` unchanged. + """ + es = cls._get_connection(using) + return bool(es.exists(index=cls._default_index(index), id=id, **kwargs)) + + @classmethod + def mget( + cls, + docs: List[Dict[str, Any]], + using: Optional[UsingType] = None, + index: Optional[str] = None, + raise_on_error: bool = True, + missing: str = "none", + **kwargs: Any, + ) -> List[Optional[Self]]: + r""" + Retrieve multiple document by their ``id``\s. Returns a list of instances + in the same order as requested. + + :arg docs: list of ``id``\s of the documents to be retrieved or a list + of document specifications as per + https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-multi-get.html + :arg index: elasticsearch index to use, if the ``Document`` is + associated with an index this can be omitted. + :arg using: connection alias to use, defaults to ``'default'`` + :arg missing: what to do when one of the documents requested is not + found. Valid options are ``'none'`` (use ``None``), ``'raise'`` (raise + ``NotFoundError``) or ``'skip'`` (ignore the missing document). + + Any additional keyword arguments will be passed to + ``Elasticsearch.mget`` unchanged. + """ + if missing not in ("raise", "skip", "none"): + raise ValueError("'missing' must be 'raise', 'skip', or 'none'.") + es = cls._get_connection(using) + body = { + "docs": [ + doc if isinstance(doc, collections.abc.Mapping) else {"_id": doc} + for doc in docs + ] + } + results = es.mget(index=cls._default_index(index), body=body, **kwargs) + + objs: List[Optional[Self]] = [] + error_docs: List[Self] = [] + missing_docs: List[Self] = [] + for doc in results["docs"]: + if doc.get("found"): + if error_docs or missing_docs: + # We're going to raise an exception anyway, so avoid an + # expensive call to cls.from_es(). + continue + + objs.append(cls.from_es(doc)) + + elif doc.get("error"): + if raise_on_error: + error_docs.append(doc) + if missing == "none": + objs.append(None) + + # The doc didn't cause an error, but the doc also wasn't found. + elif missing == "raise": + missing_docs.append(doc) + elif missing == "none": + objs.append(None) + + if error_docs: + error_ids = [doc["_id"] for doc in error_docs] + message = "Required routing not provided for documents %s." + message %= ", ".join(error_ids) + raise RequestError(400, message, error_docs) # type: ignore + if missing_docs: + missing_ids = [doc["_id"] for doc in missing_docs] + message = f"Documents {', '.join(missing_ids)} not found." + raise NotFoundError(404, message, {"docs": missing_docs}) # type: ignore + return objs + + def delete( + self, + using: Optional[UsingType] = None, + index: Optional[str] = None, + **kwargs: Any, + ) -> None: + """ + Delete the instance in elasticsearch. + + :arg index: elasticsearch index to use, if the ``Document`` is + associated with an index this can be omitted. + :arg using: connection alias to use, defaults to ``'default'`` + + Any additional keyword arguments will be passed to + ``Elasticsearch.delete`` unchanged. + """ + es = self._get_connection(using) + # extract routing etc from meta + doc_meta = {k: self.meta[k] for k in DOC_META_FIELDS if k in self.meta} + + # Optimistic concurrency control + if "seq_no" in self.meta and "primary_term" in self.meta: + doc_meta["if_seq_no"] = self.meta["seq_no"] + doc_meta["if_primary_term"] = self.meta["primary_term"] + + doc_meta.update(kwargs) + i = self._get_index(index) + assert i is not None + + es.delete(index=i, **doc_meta) + + def update( + self, + using: Optional[UsingType] = None, + index: Optional[str] = None, + detect_noop: bool = True, + doc_as_upsert: bool = False, + refresh: bool = False, + retry_on_conflict: Optional[int] = None, + script: Optional[Union[str, Dict[str, Any]]] = None, + script_id: Optional[str] = None, + scripted_upsert: bool = False, + upsert: Optional[Dict[str, Any]] = None, + return_doc_meta: bool = False, + **fields: Any, + ) -> Any: + """ + Partial update of the document, specify fields you wish to update and + both the instance and the document in elasticsearch will be updated:: + + doc = MyDocument(title='Document Title!') + doc.save() + doc.update(title='New Document Title!') + + :arg index: elasticsearch index to use, if the ``Document`` is + associated with an index this can be omitted. + :arg using: connection alias to use, defaults to ``'default'`` + :arg detect_noop: Set to ``False`` to disable noop detection. + :arg refresh: Control when the changes made by this request are visible + to search. Set to ``True`` for immediate effect. + :arg retry_on_conflict: In between the get and indexing phases of the + update, it is possible that another process might have already + updated the same document. By default, the update will fail with a + version conflict exception. The retry_on_conflict parameter + controls how many times to retry the update before finally throwing + an exception. + :arg doc_as_upsert: Instead of sending a partial doc plus an upsert + doc, setting doc_as_upsert to true will use the contents of doc as + the upsert value + :arg script: the source code of the script as a string, or a dictionary + with script attributes to update. + :arg return_doc_meta: set to ``True`` to return all metadata from the + index API call instead of only the operation result + + :return: operation result noop/updated + """ + body: Dict[str, Any] = { + "doc_as_upsert": doc_as_upsert, + "detect_noop": detect_noop, + } + + # scripted update + if script or script_id: + if upsert is not None: + body["upsert"] = upsert + + if script: + if isinstance(script, str): + script = {"source": script} + else: + script = {"id": script_id} + + if "params" not in script: + script["params"] = fields + else: + script["params"].update(fields) + + body["script"] = script + body["scripted_upsert"] = scripted_upsert + + # partial document update + else: + if not fields: + raise IllegalOperation( + "You cannot call update() without updating individual fields or a script. " + "If you wish to update the entire object use save()." + ) + + # update given fields locally + merge(self, fields) + + # prepare data for ES + values = self.to_dict(skip_empty=False) + + # if fields were given: partial update + body["doc"] = {k: values.get(k) for k in fields.keys()} + + # extract routing etc from meta + doc_meta = {k: self.meta[k] for k in DOC_META_FIELDS if k in self.meta} + + if retry_on_conflict is not None: + doc_meta["retry_on_conflict"] = retry_on_conflict + + # Optimistic concurrency control + if ( + retry_on_conflict in (None, 0) + and "seq_no" in self.meta + and "primary_term" in self.meta + ): + doc_meta["if_seq_no"] = self.meta["seq_no"] + doc_meta["if_primary_term"] = self.meta["primary_term"] + + i = self._get_index(index) + assert i is not None + + meta = self._get_connection(using).update( + index=i, body=body, refresh=refresh, **doc_meta + ) + + # update meta information from ES + for k in META_FIELDS: + if "_" + k in meta: + setattr(self.meta, k, meta["_" + k]) + + return meta if return_doc_meta else meta["result"] + + def save( + self, + using: Optional[UsingType] = None, + index: Optional[str] = None, + validate: bool = True, + skip_empty: bool = True, + return_doc_meta: bool = False, + **kwargs: Any, + ) -> Any: + """ + Save the document into elasticsearch. If the document doesn't exist it + is created, it is overwritten otherwise. Returns ``True`` if this + operations resulted in new document being created. + + :arg index: elasticsearch index to use, if the ``Document`` is + associated with an index this can be omitted. + :arg using: connection alias to use, defaults to ``'default'`` + :arg validate: set to ``False`` to skip validating the document + :arg skip_empty: if set to ``False`` will cause empty values (``None``, + ``[]``, ``{}``) to be left on the document. Those values will be + stripped out otherwise as they make no difference in elasticsearch. + :arg return_doc_meta: set to ``True`` to return all metadata from the + update API call instead of only the operation result + + Any additional keyword arguments will be passed to + ``Elasticsearch.index`` unchanged. + + :return: operation result created/updated + """ + if validate: + self.full_clean() + + es = self._get_connection(using) + # extract routing etc from meta + doc_meta = {k: self.meta[k] for k in DOC_META_FIELDS if k in self.meta} + + # Optimistic concurrency control + if "seq_no" in self.meta and "primary_term" in self.meta: + doc_meta["if_seq_no"] = self.meta["seq_no"] + doc_meta["if_primary_term"] = self.meta["primary_term"] + + doc_meta.update(kwargs) + i = self._get_index(index) + assert i is not None + + meta = es.index( + index=i, + body=self.to_dict(skip_empty=skip_empty), + **doc_meta, + ) + # update meta information from ES + for k in META_FIELDS: + if "_" + k in meta: + setattr(self.meta, k, meta["_" + k]) + + return meta if return_doc_meta else meta["result"] + + @classmethod + def bulk( + cls, + actions: Iterable[Union[Self, Dict[str, Any]]], + using: Optional[UsingType] = None, + index: Optional[str] = None, + validate: bool = True, + skip_empty: bool = True, + **kwargs: Any, + ) -> Tuple[int, Union[int, List[Any]]]: + """ + Allows to perform multiple indexing operations in a single request. + + :arg actions: a generator that returns document instances to be indexed, + bulk operation dictionaries. + :arg using: connection alias to use, defaults to ``'default'`` + :arg index: Elasticsearch index to use, if the ``Document`` is + associated with an index this can be omitted. + :arg validate: set to ``False`` to skip validating the documents + :arg skip_empty: if set to ``False`` will cause empty values (``None``, + ``[]``, ``{}``) to be left on the document. Those values will be + stripped out otherwise as they make no difference in Elasticsearch. + + Any additional keyword arguments will be passed to + ``Elasticsearch.bulk`` unchanged. + + :return: bulk operation results + """ + es = cls._get_connection(using) + + i = cls._default_index(index) + assert i is not None + + class Generate: + def __init__( + self, + doc_iterator: Iterable[Union[Document, Dict[str, Any]]], + ): + self.doc_iterator = doc_iterator.__iter__() + + def __iter__(self) -> Self: + return self + + def __next__(self) -> Dict[str, Any]: + doc: Optional[Union[Document, Dict[str, Any]]] = ( + self.doc_iterator.__next__() + ) + + if isinstance(doc, dict): + action = doc + doc = None + if "_source" in action and isinstance(action["_source"], Document): + doc = action["_source"] + if validate: # pragma: no cover + doc.full_clean() + action["_source"] = doc.to_dict( + include_meta=False, skip_empty=skip_empty + ) + elif doc is not None: + if validate: # pragma: no cover + doc.full_clean() + action = doc.to_dict(include_meta=True, skip_empty=skip_empty) + if "_index" not in action: + action["_index"] = i + return action + + return bulk(es, Generate(actions), **kwargs) diff --git a/elasticsearch/dsl/_sync/faceted_search.py b/elasticsearch/dsl/_sync/faceted_search.py new file mode 100644 index 000000000..115492c7a --- /dev/null +++ b/elasticsearch/dsl/_sync/faceted_search.py @@ -0,0 +1,51 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import TYPE_CHECKING + +from ..faceted_search_base import FacetedResponse, FacetedSearchBase + +from ..utils import _R +from .search import Search + +if TYPE_CHECKING: + from ..response import Response + + +class FacetedSearch(FacetedSearchBase[_R]): + _s: Search[_R] + + def count(self) -> int: + return self._s.count() + + def search(self) -> Search[_R]: + """ + Returns the base Search object to which the facets are added. + + You can customize the query by overriding this method and returning a + modified search object. + """ + s = Search[_R](doc_type=self.doc_types, index=self.index, using=self.using) + return s.response_class(FacetedResponse) + + def execute(self) -> "Response[_R]": + """ + Execute the search and return the response. + """ + r = self._s.execute() + r._faceted_search = self + return r diff --git a/elasticsearch/dsl/_sync/index.py b/elasticsearch/dsl/_sync/index.py new file mode 100644 index 000000000..171f70bfb --- /dev/null +++ b/elasticsearch/dsl/_sync/index.py @@ -0,0 +1,596 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import TYPE_CHECKING, Any, Dict, Optional + +from typing_extensions import Self + +from ..connections import get_connection +from ..exceptions import IllegalOperation +from ..index_base import IndexBase +from ..utils import UsingType +from .mapping import Mapping +from .search import Search +from .update_by_query import UpdateByQuery + +if TYPE_CHECKING: + from elastic_transport import ObjectApiResponse + from elasticsearch import Elasticsearch + + +class IndexTemplate: + def __init__( + self, + name: str, + template: str, + index: Optional["Index"] = None, + order: Optional[int] = None, + **kwargs: Any, + ): + if index is None: + self._index = Index(template, **kwargs) + else: + if kwargs: + raise ValueError( + "You cannot specify options for Index when" + " passing an Index instance." + ) + self._index = index.clone() + self._index._name = template + self._template_name = name + self.order = order + + def __getattr__(self, attr_name: str) -> Any: + return getattr(self._index, attr_name) + + def to_dict(self) -> Dict[str, Any]: + d = self._index.to_dict() + d["index_patterns"] = [self._index._name] + if self.order is not None: + d["order"] = self.order + return d + + def save(self, using: Optional[UsingType] = None) -> "ObjectApiResponse[Any]": + es = get_connection(using or self._index._using) + return es.indices.put_template(name=self._template_name, body=self.to_dict()) + + +class ComposableIndexTemplate: + def __init__( + self, + name: str, + template: str, + index: Optional["Index"] = None, + priority: Optional[int] = None, + **kwargs: Any, + ): + if index is None: + self._index = Index(template, **kwargs) + else: + if kwargs: + raise ValueError( + "You cannot specify options for Index when" + " passing an Index instance." + ) + self._index = index.clone() + self._index._name = template + self._template_name = name + self.priority = priority + + def __getattr__(self, attr_name: str) -> Any: + return getattr(self._index, attr_name) + + def to_dict(self) -> Dict[str, Any]: + d: Dict[str, Any] = {"template": self._index.to_dict()} + d["index_patterns"] = [self._index._name] + if self.priority is not None: + d["priority"] = self.priority + return d + + def save(self, using: Optional[UsingType] = None) -> "ObjectApiResponse[Any]": + es = get_connection(using or self._index._using) + return es.indices.put_index_template(name=self._template_name, **self.to_dict()) + + +class Index(IndexBase): + _using: UsingType + + if TYPE_CHECKING: + + def get_or_create_mapping(self) -> Mapping: ... + + def __init__(self, name: str, using: UsingType = "default"): + """ + :arg name: name of the index + :arg using: connection alias to use, defaults to ``'default'`` + """ + super().__init__(name, Mapping, using=using) + + def _get_connection(self, using: Optional[UsingType] = None) -> "Elasticsearch": + if self._name is None: + raise ValueError("You cannot perform API calls on the default index.") + return get_connection(using or self._using) + + connection = property(_get_connection) + + def as_template( + self, + template_name: str, + pattern: Optional[str] = None, + order: Optional[int] = None, + ) -> IndexTemplate: + return IndexTemplate( + template_name, pattern or self._name, index=self, order=order + ) + + def as_composable_template( + self, + template_name: str, + pattern: Optional[str] = None, + priority: Optional[int] = None, + ) -> ComposableIndexTemplate: + return ComposableIndexTemplate( + template_name, pattern or self._name, index=self, priority=priority + ) + + def load_mappings(self, using: Optional[UsingType] = None) -> None: + self.get_or_create_mapping().update_from_es( + self._name, using=using or self._using + ) + + def clone( + self, name: Optional[str] = None, using: Optional[UsingType] = None + ) -> Self: + """ + Create a copy of the instance with another name or connection alias. + Useful for creating multiple indices with shared configuration:: + + i = Index('base-index') + i.settings(number_of_shards=1) + i.create() + + i2 = i.clone('other-index') + i2.create() + + :arg name: name of the index + :arg using: connection alias to use, defaults to ``'default'`` + """ + i = self.__class__(name or self._name, using=using or self._using) + i._settings = self._settings.copy() + i._aliases = self._aliases.copy() + i._analysis = self._analysis.copy() + i._doc_types = self._doc_types[:] + if self._mapping is not None: + i._mapping = self._mapping._clone() + return i + + def search(self, using: Optional[UsingType] = None) -> Search: + """ + Return a :class:`~elasticsearch.dsl.Search` object searching over the + index (or all the indices belonging to this template) and its + ``Document``\\s. + """ + return Search( + using=using or self._using, index=self._name, doc_type=self._doc_types + ) + + def updateByQuery(self, using: Optional[UsingType] = None) -> UpdateByQuery: + """ + Return a :class:`~elasticsearch.dsl.UpdateByQuery` object searching over the index + (or all the indices belonging to this template) and updating Documents that match + the search criteria. + + For more information, see here: + https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-update-by-query.html + """ + return UpdateByQuery( + using=using or self._using, + index=self._name, + ) + + def create( + self, using: Optional[UsingType] = None, **kwargs: Any + ) -> "ObjectApiResponse[Any]": + """ + Creates the index in elasticsearch. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.create`` unchanged. + """ + return self._get_connection(using).indices.create( + index=self._name, body=self.to_dict(), **kwargs + ) + + def is_closed(self, using: Optional[UsingType] = None) -> bool: + state = self._get_connection(using).cluster.state( + index=self._name, metric="metadata" + ) + return bool(state["metadata"]["indices"][self._name]["state"] == "close") + + def save( + self, using: Optional[UsingType] = None + ) -> "Optional[ObjectApiResponse[Any]]": + """ + Sync the index definition with elasticsearch, creating the index if it + doesn't exist and updating its settings and mappings if it does. + + Note some settings and mapping changes cannot be done on an open + index (or at all on an existing index) and for those this method will + fail with the underlying exception. + """ + if not self.exists(using=using): + return self.create(using=using) + + body = self.to_dict() + settings = body.pop("settings", {}) + analysis = settings.pop("analysis", None) + current_settings = (self.get_settings(using=using))[self._name]["settings"][ + "index" + ] + if analysis: + if self.is_closed(using=using): + # closed index, update away + settings["analysis"] = analysis + else: + # compare analysis definition, if all analysis objects are + # already defined as requested, skip analysis update and + # proceed, otherwise raise IllegalOperation + existing_analysis = current_settings.get("analysis", {}) + if any( + existing_analysis.get(section, {}).get(k, None) + != analysis[section][k] + for section in analysis + for k in analysis[section] + ): + raise IllegalOperation( + "You cannot update analysis configuration on an open index, " + "you need to close index %s first." % self._name + ) + + # try and update the settings + if settings: + settings = settings.copy() + for k, v in list(settings.items()): + if k in current_settings and current_settings[k] == str(v): + del settings[k] + + if settings: + self.put_settings(using=using, body=settings) + + # update the mappings, any conflict in the mappings will result in an + # exception + mappings = body.pop("mappings", {}) + if mappings: + return self.put_mapping(using=using, body=mappings) + + return None + + def analyze( + self, using: Optional[UsingType] = None, **kwargs: Any + ) -> "ObjectApiResponse[Any]": + """ + Perform the analysis process on a text and return the tokens breakdown + of the text. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.analyze`` unchanged. + """ + return self._get_connection(using).indices.analyze(index=self._name, **kwargs) + + def refresh( + self, using: Optional[UsingType] = None, **kwargs: Any + ) -> "ObjectApiResponse[Any]": + """ + Performs a refresh operation on the index. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.refresh`` unchanged. + """ + return self._get_connection(using).indices.refresh(index=self._name, **kwargs) + + def flush( + self, using: Optional[UsingType] = None, **kwargs: Any + ) -> "ObjectApiResponse[Any]": + """ + Performs a flush operation on the index. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.flush`` unchanged. + """ + return self._get_connection(using).indices.flush(index=self._name, **kwargs) + + def get( + self, using: Optional[UsingType] = None, **kwargs: Any + ) -> "ObjectApiResponse[Any]": + """ + The get index API allows to retrieve information about the index. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.get`` unchanged. + """ + return self._get_connection(using).indices.get(index=self._name, **kwargs) + + def open( + self, using: Optional[UsingType] = None, **kwargs: Any + ) -> "ObjectApiResponse[Any]": + """ + Opens the index in elasticsearch. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.open`` unchanged. + """ + return self._get_connection(using).indices.open(index=self._name, **kwargs) + + def close( + self, using: Optional[UsingType] = None, **kwargs: Any + ) -> "ObjectApiResponse[Any]": + """ + Closes the index in elasticsearch. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.close`` unchanged. + """ + return self._get_connection(using).indices.close(index=self._name, **kwargs) + + def delete( + self, using: Optional[UsingType] = None, **kwargs: Any + ) -> "ObjectApiResponse[Any]": + """ + Deletes the index in elasticsearch. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.delete`` unchanged. + """ + return self._get_connection(using).indices.delete(index=self._name, **kwargs) + + def exists(self, using: Optional[UsingType] = None, **kwargs: Any) -> bool: + """ + Returns ``True`` if the index already exists in elasticsearch. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.exists`` unchanged. + """ + return bool( + self._get_connection(using).indices.exists(index=self._name, **kwargs) + ) + + def put_mapping( + self, using: Optional[UsingType] = None, **kwargs: Any + ) -> "ObjectApiResponse[Any]": + """ + Register specific mapping definition for a specific type. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.put_mapping`` unchanged. + """ + return self._get_connection(using).indices.put_mapping( + index=self._name, **kwargs + ) + + def get_mapping( + self, using: Optional[UsingType] = None, **kwargs: Any + ) -> "ObjectApiResponse[Any]": + """ + Retrieve specific mapping definition for a specific type. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.get_mapping`` unchanged. + """ + return self._get_connection(using).indices.get_mapping( + index=self._name, **kwargs + ) + + def get_field_mapping( + self, using: Optional[UsingType] = None, **kwargs: Any + ) -> "ObjectApiResponse[Any]": + """ + Retrieve mapping definition of a specific field. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.get_field_mapping`` unchanged. + """ + return self._get_connection(using).indices.get_field_mapping( + index=self._name, **kwargs + ) + + def put_alias( + self, using: Optional[UsingType] = None, **kwargs: Any + ) -> "ObjectApiResponse[Any]": + """ + Create an alias for the index. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.put_alias`` unchanged. + """ + return self._get_connection(using).indices.put_alias(index=self._name, **kwargs) + + def exists_alias(self, using: Optional[UsingType] = None, **kwargs: Any) -> bool: + """ + Return a boolean indicating whether given alias exists for this index. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.exists_alias`` unchanged. + """ + return bool( + self._get_connection(using).indices.exists_alias(index=self._name, **kwargs) + ) + + def get_alias( + self, using: Optional[UsingType] = None, **kwargs: Any + ) -> "ObjectApiResponse[Any]": + """ + Retrieve a specified alias. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.get_alias`` unchanged. + """ + return self._get_connection(using).indices.get_alias(index=self._name, **kwargs) + + def delete_alias( + self, using: Optional[UsingType] = None, **kwargs: Any + ) -> "ObjectApiResponse[Any]": + """ + Delete specific alias. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.delete_alias`` unchanged. + """ + return self._get_connection(using).indices.delete_alias( + index=self._name, **kwargs + ) + + def get_settings( + self, using: Optional[UsingType] = None, **kwargs: Any + ) -> "ObjectApiResponse[Any]": + """ + Retrieve settings for the index. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.get_settings`` unchanged. + """ + return self._get_connection(using).indices.get_settings( + index=self._name, **kwargs + ) + + def put_settings( + self, using: Optional[UsingType] = None, **kwargs: Any + ) -> "ObjectApiResponse[Any]": + """ + Change specific index level settings in real time. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.put_settings`` unchanged. + """ + return self._get_connection(using).indices.put_settings( + index=self._name, **kwargs + ) + + def stats( + self, using: Optional[UsingType] = None, **kwargs: Any + ) -> "ObjectApiResponse[Any]": + """ + Retrieve statistics on different operations happening on the index. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.stats`` unchanged. + """ + return self._get_connection(using).indices.stats(index=self._name, **kwargs) + + def segments( + self, using: Optional[UsingType] = None, **kwargs: Any + ) -> "ObjectApiResponse[Any]": + """ + Provide low level segments information that a Lucene index (shard + level) is built with. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.segments`` unchanged. + """ + return self._get_connection(using).indices.segments(index=self._name, **kwargs) + + def validate_query( + self, using: Optional[UsingType] = None, **kwargs: Any + ) -> "ObjectApiResponse[Any]": + """ + Validate a potentially expensive query without executing it. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.validate_query`` unchanged. + """ + return self._get_connection(using).indices.validate_query( + index=self._name, **kwargs + ) + + def clear_cache( + self, using: Optional[UsingType] = None, **kwargs: Any + ) -> "ObjectApiResponse[Any]": + """ + Clear all caches or specific cached associated with the index. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.clear_cache`` unchanged. + """ + return self._get_connection(using).indices.clear_cache( + index=self._name, **kwargs + ) + + def recovery( + self, using: Optional[UsingType] = None, **kwargs: Any + ) -> "ObjectApiResponse[Any]": + """ + The indices recovery API provides insight into on-going shard + recoveries for the index. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.recovery`` unchanged. + """ + return self._get_connection(using).indices.recovery(index=self._name, **kwargs) + + def shard_stores( + self, using: Optional[UsingType] = None, **kwargs: Any + ) -> "ObjectApiResponse[Any]": + """ + Provides store information for shard copies of the index. Store + information reports on which nodes shard copies exist, the shard copy + version, indicating how recent they are, and any exceptions encountered + while opening the shard index or from earlier engine failure. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.shard_stores`` unchanged. + """ + return self._get_connection(using).indices.shard_stores( + index=self._name, **kwargs + ) + + def forcemerge( + self, using: Optional[UsingType] = None, **kwargs: Any + ) -> "ObjectApiResponse[Any]": + """ + The force merge API allows to force merging of the index through an + API. The merge relates to the number of segments a Lucene index holds + within each shard. The force merge operation allows to reduce the + number of segments by merging them. + + This call will block until the merge is complete. If the http + connection is lost, the request will continue in the background, and + any new requests will block until the previous force merge is complete. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.forcemerge`` unchanged. + """ + return self._get_connection(using).indices.forcemerge( + index=self._name, **kwargs + ) + + def shrink( + self, using: Optional[UsingType] = None, **kwargs: Any + ) -> "ObjectApiResponse[Any]": + """ + The shrink index API allows you to shrink an existing index into a new + index with fewer primary shards. The number of primary shards in the + target index must be a factor of the shards in the source index. For + example an index with 8 primary shards can be shrunk into 4, 2 or 1 + primary shards or an index with 15 primary shards can be shrunk into 5, + 3 or 1. If the number of shards in the index is a prime number it can + only be shrunk into a single primary shard. Before shrinking, a + (primary or replica) copy of every shard in the index must be present + on the same node. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.shrink`` unchanged. + """ + return self._get_connection(using).indices.shrink(index=self._name, **kwargs) diff --git a/elasticsearch/dsl/_sync/mapping.py b/elasticsearch/dsl/_sync/mapping.py new file mode 100644 index 000000000..4ee0f282a --- /dev/null +++ b/elasticsearch/dsl/_sync/mapping.py @@ -0,0 +1,49 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import List, Optional, Union + +from typing_extensions import Self + +from ..connections import get_connection +from ..mapping_base import MappingBase +from ..utils import UsingType + + +class Mapping(MappingBase): + @classmethod + def from_es( + cls, index: Optional[Union[str, List[str]]], using: UsingType = "default" + ) -> Self: + m = cls() + m.update_from_es(index, using) + return m + + def update_from_es( + self, index: Optional[Union[str, List[str]]], using: UsingType = "default" + ) -> None: + es = get_connection(using) + raw = es.indices.get_mapping(index=index) + _, raw = raw.popitem() + self._update_from_dict(raw["mappings"]) + + def save(self, index: str, using: UsingType = "default") -> None: + from .index import Index + + i = Index(index, using=using) + i.mapping(self) + i.save() diff --git a/elasticsearch/dsl/_sync/search.py b/elasticsearch/dsl/_sync/search.py new file mode 100644 index 000000000..f3e028347 --- /dev/null +++ b/elasticsearch/dsl/_sync/search.py @@ -0,0 +1,217 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import contextlib +from typing import TYPE_CHECKING, Any, Dict, Iterator, List, Optional, cast + +from elasticsearch.exceptions import ApiError +from elasticsearch.helpers import scan +from typing_extensions import Self + +from ..connections import get_connection +from ..response import Response +from ..search_base import MultiSearchBase, SearchBase +from ..utils import _R, AttrDict, UsingType + + +class Search(SearchBase[_R]): + _using: UsingType + + def __iter__(self) -> Iterator[_R]: + """ + Iterate over the hits. + """ + + class ResultsIterator(Iterator[_R]): + def __init__(self, search: Search[_R]): + self.search = search + self.iterator: Optional[Iterator[_R]] = None + + def __next__(self) -> _R: + if self.iterator is None: + self.iterator = iter(self.search.execute()) + try: + return next(self.iterator) + except StopIteration: + raise StopIteration() + + return ResultsIterator(self) + + def count(self) -> int: + """ + Return the number of hits matching the query and filters. Note that + only the actual number is returned. + """ + if hasattr(self, "_response") and self._response.hits.total.relation == "eq": # type: ignore[attr-defined] + return cast(int, self._response.hits.total.value) # type: ignore[attr-defined] + + es = get_connection(self._using) + + d = self.to_dict(count=True) + # TODO: failed shards detection + resp = es.count( + index=self._index, + query=cast(Optional[Dict[str, Any]], d.get("query", None)), + **self._params, + ) + + return cast(int, resp["count"]) + + def execute(self, ignore_cache: bool = False) -> Response[_R]: + """ + Execute the search and return an instance of ``Response`` wrapping all + the data. + + :arg ignore_cache: if set to ``True``, consecutive calls will hit + ES, while cached result will be ignored. Defaults to `False` + """ + if ignore_cache or not hasattr(self, "_response"): + es = get_connection(self._using) + + self._response = self._response_class( + self, + ( + es.search(index=self._index, body=self.to_dict(), **self._params) + ).body, + ) + return self._response + + def scan(self) -> Iterator[_R]: + """ + Turn the search into a scan search and return a generator that will + iterate over all the documents matching the query. + + Use ``params`` method to specify any additional arguments you with to + pass to the underlying ``scan`` helper from ``elasticsearch-py`` - + https://elasticsearch-py.readthedocs.io/en/master/helpers.html#elasticsearch.helpers.scan + + The ``iterate()`` method should be preferred, as it provides similar + functionality using an Elasticsearch point in time. + """ + es = get_connection(self._using) + + for hit in scan(es, query=self.to_dict(), index=self._index, **self._params): + yield self._get_result(cast(AttrDict[Any], hit)) + + def delete(self) -> AttrDict[Any]: + """ + delete() executes the query by delegating to delete_by_query() + """ + + es = get_connection(self._using) + assert self._index is not None + + return AttrDict( + cast( + Dict[str, Any], + es.delete_by_query( + index=self._index, body=self.to_dict(), **self._params + ), + ) + ) + + @contextlib.contextmanager + def point_in_time(self, keep_alive: str = "1m") -> Iterator[Self]: + """ + Open a point in time (pit) that can be used across several searches. + + This method implements a context manager that returns a search object + configured to operate within the created pit. + + :arg keep_alive: the time to live for the point in time, renewed with each search request + """ + es = get_connection(self._using) + + pit = es.open_point_in_time(index=self._index or "*", keep_alive=keep_alive) + search = self.index().extra(pit={"id": pit["id"], "keep_alive": keep_alive}) + if not search._sort: + search = search.sort("_shard_doc") + yield search + es.close_point_in_time(id=pit["id"]) + + def iterate(self, keep_alive: str = "1m") -> Iterator[_R]: + """ + Return a generator that iterates over all the documents matching the query. + + This method uses a point in time to provide consistent results even when + the index is changing. It should be preferred over ``scan()``. + + :arg keep_alive: the time to live for the point in time, renewed with each new search request + """ + with self.point_in_time(keep_alive=keep_alive) as s: + while True: + r = s.execute() + for hit in r: + yield hit + if len(r.hits) == 0: + break + s = s.search_after() + + +class MultiSearch(MultiSearchBase[_R]): + """ + Combine multiple :class:`~elasticsearch.dsl.Search` objects into a single + request. + """ + + _using: UsingType + + if TYPE_CHECKING: + + def add(self, search: Search[_R]) -> Self: ... # type: ignore[override] + + def execute( + self, ignore_cache: bool = False, raise_on_error: bool = True + ) -> List[Response[_R]]: + """ + Execute the multi search request and return a list of search results. + """ + if ignore_cache or not hasattr(self, "_response"): + es = get_connection(self._using) + + responses = es.msearch( + index=self._index, body=self.to_dict(), **self._params + ) + + out: List[Response[_R]] = [] + for s, r in zip(self._searches, responses["responses"]): + if r.get("error", False): + if raise_on_error: + raise ApiError("N/A", meta=responses.meta, body=r) + r = None + else: + r = Response(s, r) + out.append(r) + + self._response = out + + return self._response + + +class EmptySearch(Search[_R]): + def count(self) -> int: + return 0 + + def execute(self, ignore_cache: bool = False) -> Response[_R]: + return self._response_class(self, {"hits": {"total": 0, "hits": []}}) + + def scan(self) -> Iterator[_R]: + return + yield # a bit strange, but this forces an empty generator function + + def delete(self) -> AttrDict[Any]: + return AttrDict[Any]({}) diff --git a/elasticsearch/dsl/_sync/update_by_query.py b/elasticsearch/dsl/_sync/update_by_query.py new file mode 100644 index 000000000..0caecc029 --- /dev/null +++ b/elasticsearch/dsl/_sync/update_by_query.py @@ -0,0 +1,45 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import TYPE_CHECKING + +from ..connections import get_connection +from ..update_by_query_base import UpdateByQueryBase +from ..utils import _R, UsingType + +if TYPE_CHECKING: + from ..response import UpdateByQueryResponse + + +class UpdateByQuery(UpdateByQueryBase[_R]): + _using: UsingType + + def execute(self) -> "UpdateByQueryResponse[_R]": + """ + Execute the search and return an instance of ``Response`` wrapping all + the data. + """ + es = get_connection(self._using) + assert self._index is not None + + self._response = self._response_class( + self, + ( + es.update_by_query(index=self._index, **self.to_dict(), **self._params) + ).body, + ) + return self._response diff --git a/elasticsearch/dsl/aggs.py b/elasticsearch/dsl/aggs.py new file mode 100644 index 000000000..6175027d7 --- /dev/null +++ b/elasticsearch/dsl/aggs.py @@ -0,0 +1,3731 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import collections.abc +from copy import deepcopy +from typing import ( + TYPE_CHECKING, + Any, + ClassVar, + Dict, + Generic, + Iterable, + Literal, + Mapping, + MutableMapping, + Optional, + Sequence, + Union, + cast, +) + +from elastic_transport.client_utils import DEFAULT + +from .query import Query +from .response.aggs import AggResponse, BucketData, FieldBucketData, TopHitsData +from .utils import _R, AttrDict, DslBase + +if TYPE_CHECKING: + from elastic_transport.client_utils import DefaultType + + from . import types + + from .document_base import InstrumentedField + from .search_base import SearchBase + + +def A( + name_or_agg: Union[MutableMapping[str, Any], "Agg[_R]", str], + filter: Optional[Union[str, "Query"]] = None, + **params: Any, +) -> "Agg[_R]": + if filter is not None: + if name_or_agg != "filter": + raise ValueError( + "Aggregation %r doesn't accept positional argument 'filter'." + % name_or_agg + ) + params["filter"] = filter + + # {"terms": {"field": "tags"}, "aggs": {...}} + if isinstance(name_or_agg, collections.abc.MutableMapping): + if params: + raise ValueError("A() cannot accept parameters when passing in a dict.") + # copy to avoid modifying in-place + agg = deepcopy(name_or_agg) + # pop out nested aggs + aggs = agg.pop("aggs", None) + # pop out meta data + meta = agg.pop("meta", None) + # should be {"terms": {"field": "tags"}} + if len(agg) != 1: + raise ValueError( + 'A() can only accept dict with an aggregation ({"terms": {...}}). ' + "Instead it got (%r)" % name_or_agg + ) + agg_type, params = agg.popitem() + if aggs: + params = params.copy() + params["aggs"] = aggs + if meta: + params = params.copy() + params["meta"] = meta + return Agg[_R].get_dsl_class(agg_type)(_expand__to_dot=False, **params) + + # Terms(...) just return the nested agg + elif isinstance(name_or_agg, Agg): + if params: + raise ValueError( + "A() cannot accept parameters when passing in an Agg object." + ) + return name_or_agg + + # "terms", field="tags" + return Agg[_R].get_dsl_class(name_or_agg)(**params) + + +class Agg(DslBase, Generic[_R]): + _type_name = "agg" + _type_shortcut = staticmethod(A) + name = "" + + def __contains__(self, key: str) -> bool: + return False + + def to_dict(self) -> Dict[str, Any]: + d = super().to_dict() + if isinstance(d[self.name], dict): + n = cast(Dict[str, Any], d[self.name]) + if "meta" in n: + d["meta"] = n.pop("meta") + return d + + def result(self, search: "SearchBase[_R]", data: Dict[str, Any]) -> AttrDict[Any]: + return AggResponse[_R](self, search, data) + + +class AggBase(Generic[_R]): + aggs: Dict[str, Agg[_R]] + _base: Agg[_R] + _params: Dict[str, Any] + _param_defs: ClassVar[Dict[str, Any]] = { + "aggs": {"type": "agg", "hash": True}, + } + + def __contains__(self, key: str) -> bool: + return key in self._params.get("aggs", {}) + + def __getitem__(self, agg_name: str) -> Agg[_R]: + agg = cast( + Agg[_R], self._params.setdefault("aggs", {})[agg_name] + ) # propagate KeyError + + # make sure we're not mutating a shared state - whenever accessing a + # bucket, return a shallow copy of it to be safe + if isinstance(agg, Bucket): + agg = A(agg.name, **agg._params) + # be sure to store the copy so any modifications to it will affect us + self._params["aggs"][agg_name] = agg + + return agg + + def __setitem__(self, agg_name: str, agg: Agg[_R]) -> None: + self.aggs[agg_name] = A(agg) + + def __iter__(self) -> Iterable[str]: + return iter(self.aggs) + + def _agg( + self, + bucket: bool, + name: str, + agg_type: Union[Dict[str, Any], Agg[_R], str], + *args: Any, + **params: Any, + ) -> Agg[_R]: + agg = self[name] = A(agg_type, *args, **params) + + # For chaining - when creating new buckets return them... + if bucket: + return agg + # otherwise return self._base so we can keep chaining + else: + return self._base + + def metric( + self, + name: str, + agg_type: Union[Dict[str, Any], Agg[_R], str], + *args: Any, + **params: Any, + ) -> Agg[_R]: + return self._agg(False, name, agg_type, *args, **params) + + def bucket( + self, + name: str, + agg_type: Union[Dict[str, Any], Agg[_R], str], + *args: Any, + **params: Any, + ) -> "Bucket[_R]": + return cast("Bucket[_R]", self._agg(True, name, agg_type, *args, **params)) + + def pipeline( + self, + name: str, + agg_type: Union[Dict[str, Any], Agg[_R], str], + *args: Any, + **params: Any, + ) -> "Pipeline[_R]": + return cast("Pipeline[_R]", self._agg(False, name, agg_type, *args, **params)) + + def result(self, search: "SearchBase[_R]", data: Any) -> AttrDict[Any]: + return BucketData(self, search, data) # type: ignore + + +class Bucket(AggBase[_R], Agg[_R]): + def __init__(self, **params: Any): + super().__init__(**params) + # remember self for chaining + self._base = self + + def to_dict(self) -> Dict[str, Any]: + d = super(AggBase, self).to_dict() + if isinstance(d[self.name], dict): + n = cast(AttrDict[Any], d[self.name]) + if "aggs" in n: + d["aggs"] = n.pop("aggs") + return d + + +class Pipeline(Agg[_R]): + pass + + +class AdjacencyMatrix(Bucket[_R]): + """ + A bucket aggregation returning a form of adjacency matrix. The request + provides a collection of named filter expressions, similar to the + `filters` aggregation. Each bucket in the response represents a non- + empty cell in the matrix of intersecting filters. + + :arg filters: Filters used to create buckets. At least one filter is + required. + :arg separator: Separator used to concatenate filter names. Defaults + to &. + """ + + name = "adjacency_matrix" + _param_defs = { + "filters": {"type": "query", "hash": True}, + } + + def __init__( + self, + *, + filters: Union[Mapping[str, Query], "DefaultType"] = DEFAULT, + separator: Union[str, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__(filters=filters, separator=separator, **kwargs) + + +class AutoDateHistogram(Bucket[_R]): + """ + A multi-bucket aggregation similar to the date histogram, except + instead of providing an interval to use as the width of each bucket, a + target number of buckets is provided. + + :arg buckets: The target number of buckets. Defaults to `10` if + omitted. + :arg field: The field on which to run the aggregation. + :arg format: The date format used to format `key_as_string` in the + response. If no `format` is specified, the first date format + specified in the field mapping is used. + :arg minimum_interval: The minimum rounding interval. This can make + the collection process more efficient, as the aggregation will not + attempt to round at any interval lower than `minimum_interval`. + :arg missing: The value to apply to documents that do not have a + value. By default, documents without a value are ignored. + :arg offset: Time zone specified as a ISO 8601 UTC offset. + :arg params: + :arg script: + :arg time_zone: Time zone ID. + """ + + name = "auto_date_histogram" + + def __init__( + self, + *, + buckets: Union[int, "DefaultType"] = DEFAULT, + field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + format: Union[str, "DefaultType"] = DEFAULT, + minimum_interval: Union[ + Literal["second", "minute", "hour", "day", "month", "year"], "DefaultType" + ] = DEFAULT, + missing: Any = DEFAULT, + offset: Union[str, "DefaultType"] = DEFAULT, + params: Union[Mapping[str, Any], "DefaultType"] = DEFAULT, + script: Union["types.Script", Dict[str, Any], "DefaultType"] = DEFAULT, + time_zone: Union[str, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + buckets=buckets, + field=field, + format=format, + minimum_interval=minimum_interval, + missing=missing, + offset=offset, + params=params, + script=script, + time_zone=time_zone, + **kwargs, + ) + + def result(self, search: "SearchBase[_R]", data: Any) -> AttrDict[Any]: + return FieldBucketData(self, search, data) + + +class Avg(Agg[_R]): + """ + A single-value metrics aggregation that computes the average of + numeric values that are extracted from the aggregated documents. + + :arg format: + :arg field: The field on which to run the aggregation. + :arg missing: The value to apply to documents that do not have a + value. By default, documents without a value are ignored. + :arg script: + """ + + name = "avg" + + def __init__( + self, + *, + format: Union[str, "DefaultType"] = DEFAULT, + field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + missing: Union[str, int, float, bool, "DefaultType"] = DEFAULT, + script: Union["types.Script", Dict[str, Any], "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + format=format, field=field, missing=missing, script=script, **kwargs + ) + + +class AvgBucket(Pipeline[_R]): + """ + A sibling pipeline aggregation which calculates the mean value of a + specified metric in a sibling aggregation. The specified metric must + be numeric and the sibling aggregation must be a multi-bucket + aggregation. + + :arg format: `DecimalFormat` pattern for the output value. If + specified, the formatted value is returned in the aggregation’s + `value_as_string` property. + :arg gap_policy: Policy to apply when gaps are found in the data. + Defaults to `skip` if omitted. + :arg buckets_path: Path to the buckets that contain one set of values + to correlate. + """ + + name = "avg_bucket" + + def __init__( + self, + *, + format: Union[str, "DefaultType"] = DEFAULT, + gap_policy: Union[ + Literal["skip", "insert_zeros", "keep_values"], "DefaultType" + ] = DEFAULT, + buckets_path: Union[ + str, Sequence[str], Mapping[str, str], "DefaultType" + ] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + format=format, gap_policy=gap_policy, buckets_path=buckets_path, **kwargs + ) + + +class Boxplot(Agg[_R]): + """ + A metrics aggregation that computes a box plot of numeric values + extracted from the aggregated documents. + + :arg compression: Limits the maximum number of nodes used by the + underlying TDigest algorithm to `20 * compression`, enabling + control of memory usage and approximation error. + :arg field: The field on which to run the aggregation. + :arg missing: The value to apply to documents that do not have a + value. By default, documents without a value are ignored. + :arg script: + """ + + name = "boxplot" + + def __init__( + self, + *, + compression: Union[float, "DefaultType"] = DEFAULT, + field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + missing: Union[str, int, float, bool, "DefaultType"] = DEFAULT, + script: Union["types.Script", Dict[str, Any], "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + compression=compression, + field=field, + missing=missing, + script=script, + **kwargs, + ) + + +class BucketScript(Pipeline[_R]): + """ + A parent pipeline aggregation which runs a script which can perform + per bucket computations on metrics in the parent multi-bucket + aggregation. + + :arg script: The script to run for this aggregation. + :arg format: `DecimalFormat` pattern for the output value. If + specified, the formatted value is returned in the aggregation’s + `value_as_string` property. + :arg gap_policy: Policy to apply when gaps are found in the data. + Defaults to `skip` if omitted. + :arg buckets_path: Path to the buckets that contain one set of values + to correlate. + """ + + name = "bucket_script" + + def __init__( + self, + *, + script: Union["types.Script", Dict[str, Any], "DefaultType"] = DEFAULT, + format: Union[str, "DefaultType"] = DEFAULT, + gap_policy: Union[ + Literal["skip", "insert_zeros", "keep_values"], "DefaultType" + ] = DEFAULT, + buckets_path: Union[ + str, Sequence[str], Mapping[str, str], "DefaultType" + ] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + script=script, + format=format, + gap_policy=gap_policy, + buckets_path=buckets_path, + **kwargs, + ) + + +class BucketSelector(Pipeline[_R]): + """ + A parent pipeline aggregation which runs a script to determine whether + the current bucket will be retained in the parent multi-bucket + aggregation. + + :arg script: The script to run for this aggregation. + :arg format: `DecimalFormat` pattern for the output value. If + specified, the formatted value is returned in the aggregation’s + `value_as_string` property. + :arg gap_policy: Policy to apply when gaps are found in the data. + Defaults to `skip` if omitted. + :arg buckets_path: Path to the buckets that contain one set of values + to correlate. + """ + + name = "bucket_selector" + + def __init__( + self, + *, + script: Union["types.Script", Dict[str, Any], "DefaultType"] = DEFAULT, + format: Union[str, "DefaultType"] = DEFAULT, + gap_policy: Union[ + Literal["skip", "insert_zeros", "keep_values"], "DefaultType" + ] = DEFAULT, + buckets_path: Union[ + str, Sequence[str], Mapping[str, str], "DefaultType" + ] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + script=script, + format=format, + gap_policy=gap_policy, + buckets_path=buckets_path, + **kwargs, + ) + + +class BucketSort(Bucket[_R]): + """ + A parent pipeline aggregation which sorts the buckets of its parent + multi-bucket aggregation. + + :arg from: Buckets in positions prior to `from` will be truncated. + :arg gap_policy: The policy to apply when gaps are found in the data. + Defaults to `skip` if omitted. + :arg size: The number of buckets to return. Defaults to all buckets of + the parent aggregation. + :arg sort: The list of fields to sort on. + """ + + name = "bucket_sort" + + def __init__( + self, + *, + from_: Union[int, "DefaultType"] = DEFAULT, + gap_policy: Union[ + Literal["skip", "insert_zeros", "keep_values"], "DefaultType" + ] = DEFAULT, + size: Union[int, "DefaultType"] = DEFAULT, + sort: Union[ + Union[Union[str, "InstrumentedField"], "types.SortOptions"], + Sequence[Union[Union[str, "InstrumentedField"], "types.SortOptions"]], + Dict[str, Any], + "DefaultType", + ] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + from_=from_, gap_policy=gap_policy, size=size, sort=sort, **kwargs + ) + + +class BucketCountKsTest(Pipeline[_R]): + """ + A sibling pipeline aggregation which runs a two sample + Kolmogorov–Smirnov test ("K-S test") against a provided distribution + and the distribution implied by the documents counts in the configured + sibling aggregation. + + :arg alternative: A list of string values indicating which K-S test + alternative to calculate. The valid values are: "greater", "less", + "two_sided". This parameter is key for determining the K-S + statistic used when calculating the K-S test. Default value is all + possible alternative hypotheses. + :arg fractions: A list of doubles indicating the distribution of the + samples with which to compare to the `buckets_path` results. In + typical usage this is the overall proportion of documents in each + bucket, which is compared with the actual document proportions in + each bucket from the sibling aggregation counts. The default is to + assume that overall documents are uniformly distributed on these + buckets, which they would be if one used equal percentiles of a + metric to define the bucket end points. + :arg sampling_method: Indicates the sampling methodology when + calculating the K-S test. Note, this is sampling of the returned + values. This determines the cumulative distribution function (CDF) + points used comparing the two samples. Default is `upper_tail`, + which emphasizes the upper end of the CDF points. Valid options + are: `upper_tail`, `uniform`, and `lower_tail`. + :arg buckets_path: Path to the buckets that contain one set of values + to correlate. + """ + + name = "bucket_count_ks_test" + + def __init__( + self, + *, + alternative: Union[Sequence[str], "DefaultType"] = DEFAULT, + fractions: Union[Sequence[float], "DefaultType"] = DEFAULT, + sampling_method: Union[str, "DefaultType"] = DEFAULT, + buckets_path: Union[ + str, Sequence[str], Mapping[str, str], "DefaultType" + ] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + alternative=alternative, + fractions=fractions, + sampling_method=sampling_method, + buckets_path=buckets_path, + **kwargs, + ) + + +class BucketCorrelation(Pipeline[_R]): + """ + A sibling pipeline aggregation which runs a correlation function on + the configured sibling multi-bucket aggregation. + + :arg function: (required) The correlation function to execute. + :arg buckets_path: Path to the buckets that contain one set of values + to correlate. + """ + + name = "bucket_correlation" + + def __init__( + self, + *, + function: Union[ + "types.BucketCorrelationFunction", Dict[str, Any], "DefaultType" + ] = DEFAULT, + buckets_path: Union[ + str, Sequence[str], Mapping[str, str], "DefaultType" + ] = DEFAULT, + **kwargs: Any, + ): + super().__init__(function=function, buckets_path=buckets_path, **kwargs) + + +class Cardinality(Agg[_R]): + """ + A single-value metrics aggregation that calculates an approximate + count of distinct values. + + :arg precision_threshold: A unique count below which counts are + expected to be close to accurate. This allows to trade memory for + accuracy. Defaults to `3000` if omitted. + :arg rehash: + :arg execution_hint: Mechanism by which cardinality aggregations is + run. + :arg field: The field on which to run the aggregation. + :arg missing: The value to apply to documents that do not have a + value. By default, documents without a value are ignored. + :arg script: + """ + + name = "cardinality" + + def __init__( + self, + *, + precision_threshold: Union[int, "DefaultType"] = DEFAULT, + rehash: Union[bool, "DefaultType"] = DEFAULT, + execution_hint: Union[ + Literal[ + "global_ordinals", + "segment_ordinals", + "direct", + "save_memory_heuristic", + "save_time_heuristic", + ], + "DefaultType", + ] = DEFAULT, + field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + missing: Union[str, int, float, bool, "DefaultType"] = DEFAULT, + script: Union["types.Script", Dict[str, Any], "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + precision_threshold=precision_threshold, + rehash=rehash, + execution_hint=execution_hint, + field=field, + missing=missing, + script=script, + **kwargs, + ) + + +class CategorizeText(Bucket[_R]): + """ + A multi-bucket aggregation that groups semi-structured text into + buckets. + + :arg field: (required) The semi-structured text field to categorize. + :arg max_unique_tokens: The maximum number of unique tokens at any + position up to max_matched_tokens. Must be larger than 1. Smaller + values use less memory and create fewer categories. Larger values + will use more memory and create narrower categories. Max allowed + value is 100. Defaults to `50` if omitted. + :arg max_matched_tokens: The maximum number of token positions to + match on before attempting to merge categories. Larger values will + use more memory and create narrower categories. Max allowed value + is 100. Defaults to `5` if omitted. + :arg similarity_threshold: The minimum percentage of tokens that must + match for text to be added to the category bucket. Must be between + 1 and 100. The larger the value the narrower the categories. + Larger values will increase memory usage and create narrower + categories. Defaults to `50` if omitted. + :arg categorization_filters: This property expects an array of regular + expressions. The expressions are used to filter out matching + sequences from the categorization field values. You can use this + functionality to fine tune the categorization by excluding + sequences from consideration when categories are defined. For + example, you can exclude SQL statements that appear in your log + files. This property cannot be used at the same time as + categorization_analyzer. If you only want to define simple regular + expression filters that are applied prior to tokenization, setting + this property is the easiest method. If you also want to customize + the tokenizer or post-tokenization filtering, use the + categorization_analyzer property instead and include the filters + as pattern_replace character filters. + :arg categorization_analyzer: The categorization analyzer specifies + how the text is analyzed and tokenized before being categorized. + The syntax is very similar to that used to define the analyzer in + the [Analyze endpoint](https://www.elastic.co/guide/en/elasticsear + ch/reference/8.0/indices-analyze.html). This property cannot be + used at the same time as categorization_filters. + :arg shard_size: The number of categorization buckets to return from + each shard before merging all the results. + :arg size: The number of buckets to return. Defaults to `10` if + omitted. + :arg min_doc_count: The minimum number of documents in a bucket to be + returned to the results. + :arg shard_min_doc_count: The minimum number of documents in a bucket + to be returned from the shard before merging. + """ + + name = "categorize_text" + + def __init__( + self, + *, + field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + max_unique_tokens: Union[int, "DefaultType"] = DEFAULT, + max_matched_tokens: Union[int, "DefaultType"] = DEFAULT, + similarity_threshold: Union[int, "DefaultType"] = DEFAULT, + categorization_filters: Union[Sequence[str], "DefaultType"] = DEFAULT, + categorization_analyzer: Union[ + str, "types.CustomCategorizeTextAnalyzer", Dict[str, Any], "DefaultType" + ] = DEFAULT, + shard_size: Union[int, "DefaultType"] = DEFAULT, + size: Union[int, "DefaultType"] = DEFAULT, + min_doc_count: Union[int, "DefaultType"] = DEFAULT, + shard_min_doc_count: Union[int, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + field=field, + max_unique_tokens=max_unique_tokens, + max_matched_tokens=max_matched_tokens, + similarity_threshold=similarity_threshold, + categorization_filters=categorization_filters, + categorization_analyzer=categorization_analyzer, + shard_size=shard_size, + size=size, + min_doc_count=min_doc_count, + shard_min_doc_count=shard_min_doc_count, + **kwargs, + ) + + +class Children(Bucket[_R]): + """ + A single bucket aggregation that selects child documents that have the + specified type, as defined in a `join` field. + + :arg type: The child type that should be selected. + """ + + name = "children" + + def __init__(self, type: Union[str, "DefaultType"] = DEFAULT, **kwargs: Any): + super().__init__(type=type, **kwargs) + + +class Composite(Bucket[_R]): + """ + A multi-bucket aggregation that creates composite buckets from + different sources. Unlike the other multi-bucket aggregations, you can + use the `composite` aggregation to paginate *all* buckets from a + multi-level aggregation efficiently. + + :arg after: When paginating, use the `after_key` value returned in the + previous response to retrieve the next page. + :arg size: The number of composite buckets that should be returned. + Defaults to `10` if omitted. + :arg sources: The value sources used to build composite buckets. Keys + are returned in the order of the `sources` definition. + """ + + name = "composite" + + def __init__( + self, + *, + after: Union[ + Mapping[ + Union[str, "InstrumentedField"], Union[int, float, str, bool, None, Any] + ], + "DefaultType", + ] = DEFAULT, + size: Union[int, "DefaultType"] = DEFAULT, + sources: Union[Sequence[Mapping[str, Agg[_R]]], "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__(after=after, size=size, sources=sources, **kwargs) + + +class CumulativeCardinality(Pipeline[_R]): + """ + A parent pipeline aggregation which calculates the cumulative + cardinality in a parent `histogram` or `date_histogram` aggregation. + + :arg format: `DecimalFormat` pattern for the output value. If + specified, the formatted value is returned in the aggregation’s + `value_as_string` property. + :arg gap_policy: Policy to apply when gaps are found in the data. + Defaults to `skip` if omitted. + :arg buckets_path: Path to the buckets that contain one set of values + to correlate. + """ + + name = "cumulative_cardinality" + + def __init__( + self, + *, + format: Union[str, "DefaultType"] = DEFAULT, + gap_policy: Union[ + Literal["skip", "insert_zeros", "keep_values"], "DefaultType" + ] = DEFAULT, + buckets_path: Union[ + str, Sequence[str], Mapping[str, str], "DefaultType" + ] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + format=format, gap_policy=gap_policy, buckets_path=buckets_path, **kwargs + ) + + +class CumulativeSum(Pipeline[_R]): + """ + A parent pipeline aggregation which calculates the cumulative sum of a + specified metric in a parent `histogram` or `date_histogram` + aggregation. + + :arg format: `DecimalFormat` pattern for the output value. If + specified, the formatted value is returned in the aggregation’s + `value_as_string` property. + :arg gap_policy: Policy to apply when gaps are found in the data. + Defaults to `skip` if omitted. + :arg buckets_path: Path to the buckets that contain one set of values + to correlate. + """ + + name = "cumulative_sum" + + def __init__( + self, + *, + format: Union[str, "DefaultType"] = DEFAULT, + gap_policy: Union[ + Literal["skip", "insert_zeros", "keep_values"], "DefaultType" + ] = DEFAULT, + buckets_path: Union[ + str, Sequence[str], Mapping[str, str], "DefaultType" + ] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + format=format, gap_policy=gap_policy, buckets_path=buckets_path, **kwargs + ) + + +class DateHistogram(Bucket[_R]): + """ + A multi-bucket values source based aggregation that can be applied on + date values or date range values extracted from the documents. It + dynamically builds fixed size (interval) buckets over the values. + + :arg calendar_interval: Calendar-aware interval. Can be specified + using the unit name, such as `month`, or as a single unit + quantity, such as `1M`. + :arg extended_bounds: Enables extending the bounds of the histogram + beyond the data itself. + :arg hard_bounds: Limits the histogram to specified bounds. + :arg field: The date field whose values are use to build a histogram. + :arg fixed_interval: Fixed intervals: a fixed number of SI units and + never deviate, regardless of where they fall on the calendar. + :arg format: The date format used to format `key_as_string` in the + response. If no `format` is specified, the first date format + specified in the field mapping is used. + :arg interval: + :arg min_doc_count: Only returns buckets that have `min_doc_count` + number of documents. By default, all buckets between the first + bucket that matches documents and the last one are returned. + :arg missing: The value to apply to documents that do not have a + value. By default, documents without a value are ignored. + :arg offset: Changes the start value of each bucket by the specified + positive (`+`) or negative offset (`-`) duration. + :arg order: The sort order of the returned buckets. + :arg params: + :arg script: + :arg time_zone: Time zone used for bucketing and rounding. Defaults to + Coordinated Universal Time (UTC). + :arg keyed: Set to `true` to associate a unique string key with each + bucket and return the ranges as a hash rather than an array. + """ + + name = "date_histogram" + + def __init__( + self, + *, + calendar_interval: Union[ + Literal[ + "second", "minute", "hour", "day", "week", "month", "quarter", "year" + ], + "DefaultType", + ] = DEFAULT, + extended_bounds: Union[ + "types.ExtendedBounds", Dict[str, Any], "DefaultType" + ] = DEFAULT, + hard_bounds: Union[ + "types.ExtendedBounds", Dict[str, Any], "DefaultType" + ] = DEFAULT, + field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + fixed_interval: Any = DEFAULT, + format: Union[str, "DefaultType"] = DEFAULT, + interval: Any = DEFAULT, + min_doc_count: Union[int, "DefaultType"] = DEFAULT, + missing: Any = DEFAULT, + offset: Any = DEFAULT, + order: Union[ + Mapping[Union[str, "InstrumentedField"], Literal["asc", "desc"]], + Sequence[Mapping[Union[str, "InstrumentedField"], Literal["asc", "desc"]]], + "DefaultType", + ] = DEFAULT, + params: Union[Mapping[str, Any], "DefaultType"] = DEFAULT, + script: Union["types.Script", Dict[str, Any], "DefaultType"] = DEFAULT, + time_zone: Union[str, "DefaultType"] = DEFAULT, + keyed: Union[bool, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + calendar_interval=calendar_interval, + extended_bounds=extended_bounds, + hard_bounds=hard_bounds, + field=field, + fixed_interval=fixed_interval, + format=format, + interval=interval, + min_doc_count=min_doc_count, + missing=missing, + offset=offset, + order=order, + params=params, + script=script, + time_zone=time_zone, + keyed=keyed, + **kwargs, + ) + + def result(self, search: "SearchBase[_R]", data: Any) -> AttrDict[Any]: + return FieldBucketData(self, search, data) + + +class DateRange(Bucket[_R]): + """ + A multi-bucket value source based aggregation that enables the user to + define a set of date ranges - each representing a bucket. + + :arg field: The date field whose values are use to build ranges. + :arg format: The date format used to format `from` and `to` in the + response. + :arg missing: The value to apply to documents that do not have a + value. By default, documents without a value are ignored. + :arg ranges: Array of date ranges. + :arg time_zone: Time zone used to convert dates from another time zone + to UTC. + :arg keyed: Set to `true` to associate a unique string key with each + bucket and returns the ranges as a hash rather than an array. + """ + + name = "date_range" + + def __init__( + self, + *, + field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + format: Union[str, "DefaultType"] = DEFAULT, + missing: Union[str, int, float, bool, "DefaultType"] = DEFAULT, + ranges: Union[ + Sequence["types.DateRangeExpression"], + Sequence[Dict[str, Any]], + "DefaultType", + ] = DEFAULT, + time_zone: Union[str, "DefaultType"] = DEFAULT, + keyed: Union[bool, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + field=field, + format=format, + missing=missing, + ranges=ranges, + time_zone=time_zone, + keyed=keyed, + **kwargs, + ) + + +class Derivative(Pipeline[_R]): + """ + A parent pipeline aggregation which calculates the derivative of a + specified metric in a parent `histogram` or `date_histogram` + aggregation. + + :arg format: `DecimalFormat` pattern for the output value. If + specified, the formatted value is returned in the aggregation’s + `value_as_string` property. + :arg gap_policy: Policy to apply when gaps are found in the data. + Defaults to `skip` if omitted. + :arg buckets_path: Path to the buckets that contain one set of values + to correlate. + """ + + name = "derivative" + + def __init__( + self, + *, + format: Union[str, "DefaultType"] = DEFAULT, + gap_policy: Union[ + Literal["skip", "insert_zeros", "keep_values"], "DefaultType" + ] = DEFAULT, + buckets_path: Union[ + str, Sequence[str], Mapping[str, str], "DefaultType" + ] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + format=format, gap_policy=gap_policy, buckets_path=buckets_path, **kwargs + ) + + +class DiversifiedSampler(Bucket[_R]): + """ + A filtering aggregation used to limit any sub aggregations' processing + to a sample of the top-scoring documents. Similar to the `sampler` + aggregation, but adds the ability to limit the number of matches that + share a common value. + + :arg execution_hint: The type of value used for de-duplication. + Defaults to `global_ordinals` if omitted. + :arg max_docs_per_value: Limits how many documents are permitted per + choice of de-duplicating value. Defaults to `1` if omitted. + :arg script: + :arg shard_size: Limits how many top-scoring documents are collected + in the sample processed on each shard. Defaults to `100` if + omitted. + :arg field: The field used to provide values used for de-duplication. + """ + + name = "diversified_sampler" + + def __init__( + self, + *, + execution_hint: Union[ + Literal["map", "global_ordinals", "bytes_hash"], "DefaultType" + ] = DEFAULT, + max_docs_per_value: Union[int, "DefaultType"] = DEFAULT, + script: Union["types.Script", Dict[str, Any], "DefaultType"] = DEFAULT, + shard_size: Union[int, "DefaultType"] = DEFAULT, + field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + execution_hint=execution_hint, + max_docs_per_value=max_docs_per_value, + script=script, + shard_size=shard_size, + field=field, + **kwargs, + ) + + +class ExtendedStats(Agg[_R]): + """ + A multi-value metrics aggregation that computes stats over numeric + values extracted from the aggregated documents. + + :arg sigma: The number of standard deviations above/below the mean to + display. + :arg format: + :arg field: The field on which to run the aggregation. + :arg missing: The value to apply to documents that do not have a + value. By default, documents without a value are ignored. + :arg script: + """ + + name = "extended_stats" + + def __init__( + self, + *, + sigma: Union[float, "DefaultType"] = DEFAULT, + format: Union[str, "DefaultType"] = DEFAULT, + field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + missing: Union[str, int, float, bool, "DefaultType"] = DEFAULT, + script: Union["types.Script", Dict[str, Any], "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + sigma=sigma, + format=format, + field=field, + missing=missing, + script=script, + **kwargs, + ) + + +class ExtendedStatsBucket(Pipeline[_R]): + """ + A sibling pipeline aggregation which calculates a variety of stats + across all bucket of a specified metric in a sibling aggregation. + + :arg sigma: The number of standard deviations above/below the mean to + display. + :arg format: `DecimalFormat` pattern for the output value. If + specified, the formatted value is returned in the aggregation’s + `value_as_string` property. + :arg gap_policy: Policy to apply when gaps are found in the data. + Defaults to `skip` if omitted. + :arg buckets_path: Path to the buckets that contain one set of values + to correlate. + """ + + name = "extended_stats_bucket" + + def __init__( + self, + *, + sigma: Union[float, "DefaultType"] = DEFAULT, + format: Union[str, "DefaultType"] = DEFAULT, + gap_policy: Union[ + Literal["skip", "insert_zeros", "keep_values"], "DefaultType" + ] = DEFAULT, + buckets_path: Union[ + str, Sequence[str], Mapping[str, str], "DefaultType" + ] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + sigma=sigma, + format=format, + gap_policy=gap_policy, + buckets_path=buckets_path, + **kwargs, + ) + + +class FrequentItemSets(Agg[_R]): + """ + A bucket aggregation which finds frequent item sets, a form of + association rules mining that identifies items that often occur + together. + + :arg fields: (required) Fields to analyze. + :arg minimum_set_size: The minimum size of one item set. Defaults to + `1` if omitted. + :arg minimum_support: The minimum support of one item set. Defaults to + `0.1` if omitted. + :arg size: The number of top item sets to return. Defaults to `10` if + omitted. + :arg filter: Query that filters documents from analysis. + """ + + name = "frequent_item_sets" + _param_defs = { + "filter": {"type": "query"}, + } + + def __init__( + self, + *, + fields: Union[ + Sequence["types.FrequentItemSetsField"], + Sequence[Dict[str, Any]], + "DefaultType", + ] = DEFAULT, + minimum_set_size: Union[int, "DefaultType"] = DEFAULT, + minimum_support: Union[float, "DefaultType"] = DEFAULT, + size: Union[int, "DefaultType"] = DEFAULT, + filter: Union[Query, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + fields=fields, + minimum_set_size=minimum_set_size, + minimum_support=minimum_support, + size=size, + filter=filter, + **kwargs, + ) + + +class Filter(Bucket[_R]): + """ + A single bucket aggregation that narrows the set of documents to those + that match a query. + + :arg filter: A single bucket aggregation that narrows the set of + documents to those that match a query. + """ + + name = "filter" + _param_defs = { + "filter": {"type": "query"}, + "aggs": {"type": "agg", "hash": True}, + } + + def __init__(self, filter: Union[Query, "DefaultType"] = DEFAULT, **kwargs: Any): + super().__init__(filter=filter, **kwargs) + + def to_dict(self) -> Dict[str, Any]: + d = super().to_dict() + if isinstance(d[self.name], dict): + n = cast(AttrDict[Any], d[self.name]) + n.update(n.pop("filter", {})) + return d + + +class Filters(Bucket[_R]): + """ + A multi-bucket aggregation where each bucket contains the documents + that match a query. + + :arg filters: Collection of queries from which to build buckets. + :arg other_bucket: Set to `true` to add a bucket to the response which + will contain all documents that do not match any of the given + filters. + :arg other_bucket_key: The key with which the other bucket is + returned. Defaults to `_other_` if omitted. + :arg keyed: By default, the named filters aggregation returns the + buckets as an object. Set to `false` to return the buckets as an + array of objects. Defaults to `True` if omitted. + """ + + name = "filters" + _param_defs = { + "filters": {"type": "query", "hash": True}, + "aggs": {"type": "agg", "hash": True}, + } + + def __init__( + self, + *, + filters: Union[Dict[str, Query], "DefaultType"] = DEFAULT, + other_bucket: Union[bool, "DefaultType"] = DEFAULT, + other_bucket_key: Union[str, "DefaultType"] = DEFAULT, + keyed: Union[bool, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + filters=filters, + other_bucket=other_bucket, + other_bucket_key=other_bucket_key, + keyed=keyed, + **kwargs, + ) + + +class GeoBounds(Agg[_R]): + """ + A metric aggregation that computes the geographic bounding box + containing all values for a Geopoint or Geoshape field. + + :arg wrap_longitude: Specifies whether the bounding box should be + allowed to overlap the international date line. Defaults to `True` + if omitted. + :arg field: The field on which to run the aggregation. + :arg missing: The value to apply to documents that do not have a + value. By default, documents without a value are ignored. + :arg script: + """ + + name = "geo_bounds" + + def __init__( + self, + *, + wrap_longitude: Union[bool, "DefaultType"] = DEFAULT, + field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + missing: Union[str, int, float, bool, "DefaultType"] = DEFAULT, + script: Union["types.Script", Dict[str, Any], "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + wrap_longitude=wrap_longitude, + field=field, + missing=missing, + script=script, + **kwargs, + ) + + +class GeoCentroid(Agg[_R]): + """ + A metric aggregation that computes the weighted centroid from all + coordinate values for geo fields. + + :arg count: + :arg location: + :arg field: The field on which to run the aggregation. + :arg missing: The value to apply to documents that do not have a + value. By default, documents without a value are ignored. + :arg script: + """ + + name = "geo_centroid" + + def __init__( + self, + *, + count: Union[int, "DefaultType"] = DEFAULT, + location: Union[ + "types.LatLonGeoLocation", + "types.GeoHashLocation", + Sequence[float], + str, + Dict[str, Any], + "DefaultType", + ] = DEFAULT, + field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + missing: Union[str, int, float, bool, "DefaultType"] = DEFAULT, + script: Union["types.Script", Dict[str, Any], "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + count=count, + location=location, + field=field, + missing=missing, + script=script, + **kwargs, + ) + + +class GeoDistance(Bucket[_R]): + """ + A multi-bucket aggregation that works on `geo_point` fields. Evaluates + the distance of each document value from an origin point and + determines the buckets it belongs to, based on ranges defined in the + request. + + :arg distance_type: The distance calculation type. Defaults to `arc` + if omitted. + :arg field: A field of type `geo_point` used to evaluate the distance. + :arg origin: The origin used to evaluate the distance. + :arg ranges: An array of ranges used to bucket documents. + :arg unit: The distance unit. Defaults to `m` if omitted. + """ + + name = "geo_distance" + + def __init__( + self, + *, + distance_type: Union[Literal["arc", "plane"], "DefaultType"] = DEFAULT, + field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + origin: Union[ + "types.LatLonGeoLocation", + "types.GeoHashLocation", + Sequence[float], + str, + Dict[str, Any], + "DefaultType", + ] = DEFAULT, + ranges: Union[ + Sequence["types.AggregationRange"], Sequence[Dict[str, Any]], "DefaultType" + ] = DEFAULT, + unit: Union[ + Literal["in", "ft", "yd", "mi", "nmi", "km", "m", "cm", "mm"], "DefaultType" + ] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + distance_type=distance_type, + field=field, + origin=origin, + ranges=ranges, + unit=unit, + **kwargs, + ) + + +class GeohashGrid(Bucket[_R]): + """ + A multi-bucket aggregation that groups `geo_point` and `geo_shape` + values into buckets that represent a grid. Each cell is labeled using + a geohash which is of user-definable precision. + + :arg bounds: The bounding box to filter the points in each bucket. + :arg field: Field containing indexed `geo_point` or `geo_shape` + values. If the field contains an array, `geohash_grid` aggregates + all array values. + :arg precision: The string length of the geohashes used to define + cells/buckets in the results. Defaults to `5` if omitted. + :arg shard_size: Allows for more accurate counting of the top cells + returned in the final result the aggregation. Defaults to + returning `max(10,(size x number-of-shards))` buckets from each + shard. + :arg size: The maximum number of geohash buckets to return. Defaults + to `10000` if omitted. + """ + + name = "geohash_grid" + + def __init__( + self, + *, + bounds: Union[ + "types.CoordsGeoBounds", + "types.TopLeftBottomRightGeoBounds", + "types.TopRightBottomLeftGeoBounds", + "types.WktGeoBounds", + Dict[str, Any], + "DefaultType", + ] = DEFAULT, + field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + precision: Union[float, str, "DefaultType"] = DEFAULT, + shard_size: Union[int, "DefaultType"] = DEFAULT, + size: Union[int, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + bounds=bounds, + field=field, + precision=precision, + shard_size=shard_size, + size=size, + **kwargs, + ) + + +class GeoLine(Agg[_R]): + """ + Aggregates all `geo_point` values within a bucket into a `LineString` + ordered by the chosen sort field. + + :arg point: (required) The name of the geo_point field. + :arg sort: (required) The name of the numeric field to use as the sort + key for ordering the points. When the `geo_line` aggregation is + nested inside a `time_series` aggregation, this field defaults to + `@timestamp`, and any other value will result in error. + :arg include_sort: When `true`, returns an additional array of the + sort values in the feature properties. + :arg sort_order: The order in which the line is sorted (ascending or + descending). Defaults to `asc` if omitted. + :arg size: The maximum length of the line represented in the + aggregation. Valid sizes are between 1 and 10000. Defaults to + `10000` if omitted. + """ + + name = "geo_line" + + def __init__( + self, + *, + point: Union["types.GeoLinePoint", Dict[str, Any], "DefaultType"] = DEFAULT, + sort: Union["types.GeoLineSort", Dict[str, Any], "DefaultType"] = DEFAULT, + include_sort: Union[bool, "DefaultType"] = DEFAULT, + sort_order: Union[Literal["asc", "desc"], "DefaultType"] = DEFAULT, + size: Union[int, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + point=point, + sort=sort, + include_sort=include_sort, + sort_order=sort_order, + size=size, + **kwargs, + ) + + +class GeotileGrid(Bucket[_R]): + """ + A multi-bucket aggregation that groups `geo_point` and `geo_shape` + values into buckets that represent a grid. Each cell corresponds to a + map tile as used by many online map sites. + + :arg field: Field containing indexed `geo_point` or `geo_shape` + values. If the field contains an array, `geotile_grid` aggregates + all array values. + :arg precision: Integer zoom of the key used to define cells/buckets + in the results. Values outside of the range [0,29] will be + rejected. Defaults to `7` if omitted. + :arg shard_size: Allows for more accurate counting of the top cells + returned in the final result the aggregation. Defaults to + returning `max(10,(size x number-of-shards))` buckets from each + shard. + :arg size: The maximum number of buckets to return. Defaults to + `10000` if omitted. + :arg bounds: A bounding box to filter the geo-points or geo-shapes in + each bucket. + """ + + name = "geotile_grid" + + def __init__( + self, + *, + field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + precision: Union[float, "DefaultType"] = DEFAULT, + shard_size: Union[int, "DefaultType"] = DEFAULT, + size: Union[int, "DefaultType"] = DEFAULT, + bounds: Union[ + "types.CoordsGeoBounds", + "types.TopLeftBottomRightGeoBounds", + "types.TopRightBottomLeftGeoBounds", + "types.WktGeoBounds", + Dict[str, Any], + "DefaultType", + ] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + field=field, + precision=precision, + shard_size=shard_size, + size=size, + bounds=bounds, + **kwargs, + ) + + +class GeohexGrid(Bucket[_R]): + """ + A multi-bucket aggregation that groups `geo_point` and `geo_shape` + values into buckets that represent a grid. Each cell corresponds to a + H3 cell index and is labeled using the H3Index representation. + + :arg field: (required) Field containing indexed `geo_point` or + `geo_shape` values. If the field contains an array, `geohex_grid` + aggregates all array values. + :arg precision: Integer zoom of the key used to defined cells or + buckets in the results. Value should be between 0-15. Defaults to + `6` if omitted. + :arg bounds: Bounding box used to filter the geo-points in each + bucket. + :arg size: Maximum number of buckets to return. Defaults to `10000` if + omitted. + :arg shard_size: Number of buckets returned from each shard. + """ + + name = "geohex_grid" + + def __init__( + self, + *, + field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + precision: Union[int, "DefaultType"] = DEFAULT, + bounds: Union[ + "types.CoordsGeoBounds", + "types.TopLeftBottomRightGeoBounds", + "types.TopRightBottomLeftGeoBounds", + "types.WktGeoBounds", + Dict[str, Any], + "DefaultType", + ] = DEFAULT, + size: Union[int, "DefaultType"] = DEFAULT, + shard_size: Union[int, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + field=field, + precision=precision, + bounds=bounds, + size=size, + shard_size=shard_size, + **kwargs, + ) + + +class Global(Bucket[_R]): + """ + Defines a single bucket of all the documents within the search + execution context. This context is defined by the indices and the + document types you’re searching on, but is not influenced by the + search query itself. + """ + + name = "global" + + def __init__(self, **kwargs: Any): + super().__init__(**kwargs) + + +class Histogram(Bucket[_R]): + """ + A multi-bucket values source based aggregation that can be applied on + numeric values or numeric range values extracted from the documents. + It dynamically builds fixed size (interval) buckets over the values. + + :arg extended_bounds: Enables extending the bounds of the histogram + beyond the data itself. + :arg hard_bounds: Limits the range of buckets in the histogram. It is + particularly useful in the case of open data ranges that can + result in a very large number of buckets. + :arg field: The name of the field to aggregate on. + :arg interval: The interval for the buckets. Must be a positive + decimal. + :arg min_doc_count: Only returns buckets that have `min_doc_count` + number of documents. By default, the response will fill gaps in + the histogram with empty buckets. + :arg missing: The value to apply to documents that do not have a + value. By default, documents without a value are ignored. + :arg offset: By default, the bucket keys start with 0 and then + continue in even spaced steps of `interval`. The bucket boundaries + can be shifted by using the `offset` option. + :arg order: The sort order of the returned buckets. By default, the + returned buckets are sorted by their key ascending. + :arg script: + :arg format: + :arg keyed: If `true`, returns buckets as a hash instead of an array, + keyed by the bucket keys. + """ + + name = "histogram" + + def __init__( + self, + *, + extended_bounds: Union[ + "types.ExtendedBounds", Dict[str, Any], "DefaultType" + ] = DEFAULT, + hard_bounds: Union[ + "types.ExtendedBounds", Dict[str, Any], "DefaultType" + ] = DEFAULT, + field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + interval: Union[float, "DefaultType"] = DEFAULT, + min_doc_count: Union[int, "DefaultType"] = DEFAULT, + missing: Union[float, "DefaultType"] = DEFAULT, + offset: Union[float, "DefaultType"] = DEFAULT, + order: Union[ + Mapping[Union[str, "InstrumentedField"], Literal["asc", "desc"]], + Sequence[Mapping[Union[str, "InstrumentedField"], Literal["asc", "desc"]]], + "DefaultType", + ] = DEFAULT, + script: Union["types.Script", Dict[str, Any], "DefaultType"] = DEFAULT, + format: Union[str, "DefaultType"] = DEFAULT, + keyed: Union[bool, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + extended_bounds=extended_bounds, + hard_bounds=hard_bounds, + field=field, + interval=interval, + min_doc_count=min_doc_count, + missing=missing, + offset=offset, + order=order, + script=script, + format=format, + keyed=keyed, + **kwargs, + ) + + def result(self, search: "SearchBase[_R]", data: Any) -> AttrDict[Any]: + return FieldBucketData(self, search, data) + + +class IPRange(Bucket[_R]): + """ + A multi-bucket value source based aggregation that enables the user to + define a set of IP ranges - each representing a bucket. + + :arg field: The date field whose values are used to build ranges. + :arg ranges: Array of IP ranges. + """ + + name = "ip_range" + + def __init__( + self, + *, + field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + ranges: Union[ + Sequence["types.IpRangeAggregationRange"], + Sequence[Dict[str, Any]], + "DefaultType", + ] = DEFAULT, + **kwargs: Any, + ): + super().__init__(field=field, ranges=ranges, **kwargs) + + +class IPPrefix(Bucket[_R]): + """ + A bucket aggregation that groups documents based on the network or + sub-network of an IP address. + + :arg field: (required) The IP address field to aggregation on. The + field mapping type must be `ip`. + :arg prefix_length: (required) Length of the network prefix. For IPv4 + addresses the accepted range is [0, 32]. For IPv6 addresses the + accepted range is [0, 128]. + :arg is_ipv6: Defines whether the prefix applies to IPv6 addresses. + :arg append_prefix_length: Defines whether the prefix length is + appended to IP address keys in the response. + :arg keyed: Defines whether buckets are returned as a hash rather than + an array in the response. + :arg min_doc_count: Minimum number of documents in a bucket for it to + be included in the response. Defaults to `1` if omitted. + """ + + name = "ip_prefix" + + def __init__( + self, + *, + field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + prefix_length: Union[int, "DefaultType"] = DEFAULT, + is_ipv6: Union[bool, "DefaultType"] = DEFAULT, + append_prefix_length: Union[bool, "DefaultType"] = DEFAULT, + keyed: Union[bool, "DefaultType"] = DEFAULT, + min_doc_count: Union[int, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + field=field, + prefix_length=prefix_length, + is_ipv6=is_ipv6, + append_prefix_length=append_prefix_length, + keyed=keyed, + min_doc_count=min_doc_count, + **kwargs, + ) + + +class Inference(Pipeline[_R]): + """ + A parent pipeline aggregation which loads a pre-trained model and + performs inference on the collated result fields from the parent + bucket aggregation. + + :arg model_id: (required) The ID or alias for the trained model. + :arg inference_config: Contains the inference type and its options. + :arg format: `DecimalFormat` pattern for the output value. If + specified, the formatted value is returned in the aggregation’s + `value_as_string` property. + :arg gap_policy: Policy to apply when gaps are found in the data. + Defaults to `skip` if omitted. + :arg buckets_path: Path to the buckets that contain one set of values + to correlate. + """ + + name = "inference" + + def __init__( + self, + *, + model_id: Union[str, "DefaultType"] = DEFAULT, + inference_config: Union[ + "types.InferenceConfigContainer", Dict[str, Any], "DefaultType" + ] = DEFAULT, + format: Union[str, "DefaultType"] = DEFAULT, + gap_policy: Union[ + Literal["skip", "insert_zeros", "keep_values"], "DefaultType" + ] = DEFAULT, + buckets_path: Union[ + str, Sequence[str], Mapping[str, str], "DefaultType" + ] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + model_id=model_id, + inference_config=inference_config, + format=format, + gap_policy=gap_policy, + buckets_path=buckets_path, + **kwargs, + ) + + +class Line(Agg[_R]): + """ + :arg point: (required) The name of the geo_point field. + :arg sort: (required) The name of the numeric field to use as the sort + key for ordering the points. When the `geo_line` aggregation is + nested inside a `time_series` aggregation, this field defaults to + `@timestamp`, and any other value will result in error. + :arg include_sort: When `true`, returns an additional array of the + sort values in the feature properties. + :arg sort_order: The order in which the line is sorted (ascending or + descending). Defaults to `asc` if omitted. + :arg size: The maximum length of the line represented in the + aggregation. Valid sizes are between 1 and 10000. Defaults to + `10000` if omitted. + """ + + name = "line" + + def __init__( + self, + *, + point: Union["types.GeoLinePoint", Dict[str, Any], "DefaultType"] = DEFAULT, + sort: Union["types.GeoLineSort", Dict[str, Any], "DefaultType"] = DEFAULT, + include_sort: Union[bool, "DefaultType"] = DEFAULT, + sort_order: Union[Literal["asc", "desc"], "DefaultType"] = DEFAULT, + size: Union[int, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + point=point, + sort=sort, + include_sort=include_sort, + sort_order=sort_order, + size=size, + **kwargs, + ) + + +class MatrixStats(Agg[_R]): + """ + A numeric aggregation that computes the following statistics over a + set of document fields: `count`, `mean`, `variance`, `skewness`, + `kurtosis`, `covariance`, and `covariance`. + + :arg mode: Array value the aggregation will use for array or multi- + valued fields. Defaults to `avg` if omitted. + :arg fields: An array of fields for computing the statistics. + :arg missing: The value to apply to documents that do not have a + value. By default, documents without a value are ignored. + """ + + name = "matrix_stats" + + def __init__( + self, + *, + mode: Union[ + Literal["min", "max", "sum", "avg", "median"], "DefaultType" + ] = DEFAULT, + fields: Union[ + Union[str, "InstrumentedField"], + Sequence[Union[str, "InstrumentedField"]], + "DefaultType", + ] = DEFAULT, + missing: Union[ + Mapping[Union[str, "InstrumentedField"], float], "DefaultType" + ] = DEFAULT, + **kwargs: Any, + ): + super().__init__(mode=mode, fields=fields, missing=missing, **kwargs) + + +class Max(Agg[_R]): + """ + A single-value metrics aggregation that returns the maximum value + among the numeric values extracted from the aggregated documents. + + :arg format: + :arg field: The field on which to run the aggregation. + :arg missing: The value to apply to documents that do not have a + value. By default, documents without a value are ignored. + :arg script: + """ + + name = "max" + + def __init__( + self, + *, + format: Union[str, "DefaultType"] = DEFAULT, + field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + missing: Union[str, int, float, bool, "DefaultType"] = DEFAULT, + script: Union["types.Script", Dict[str, Any], "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + format=format, field=field, missing=missing, script=script, **kwargs + ) + + +class MaxBucket(Pipeline[_R]): + """ + A sibling pipeline aggregation which identifies the bucket(s) with the + maximum value of a specified metric in a sibling aggregation and + outputs both the value and the key(s) of the bucket(s). + + :arg format: `DecimalFormat` pattern for the output value. If + specified, the formatted value is returned in the aggregation’s + `value_as_string` property. + :arg gap_policy: Policy to apply when gaps are found in the data. + Defaults to `skip` if omitted. + :arg buckets_path: Path to the buckets that contain one set of values + to correlate. + """ + + name = "max_bucket" + + def __init__( + self, + *, + format: Union[str, "DefaultType"] = DEFAULT, + gap_policy: Union[ + Literal["skip", "insert_zeros", "keep_values"], "DefaultType" + ] = DEFAULT, + buckets_path: Union[ + str, Sequence[str], Mapping[str, str], "DefaultType" + ] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + format=format, gap_policy=gap_policy, buckets_path=buckets_path, **kwargs + ) + + +class MedianAbsoluteDeviation(Agg[_R]): + """ + A single-value aggregation that approximates the median absolute + deviation of its search results. + + :arg compression: Limits the maximum number of nodes used by the + underlying TDigest algorithm to `20 * compression`, enabling + control of memory usage and approximation error. Defaults to + `1000` if omitted. + :arg format: + :arg field: The field on which to run the aggregation. + :arg missing: The value to apply to documents that do not have a + value. By default, documents without a value are ignored. + :arg script: + """ + + name = "median_absolute_deviation" + + def __init__( + self, + *, + compression: Union[float, "DefaultType"] = DEFAULT, + format: Union[str, "DefaultType"] = DEFAULT, + field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + missing: Union[str, int, float, bool, "DefaultType"] = DEFAULT, + script: Union["types.Script", Dict[str, Any], "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + compression=compression, + format=format, + field=field, + missing=missing, + script=script, + **kwargs, + ) + + +class Min(Agg[_R]): + """ + A single-value metrics aggregation that returns the minimum value + among numeric values extracted from the aggregated documents. + + :arg format: + :arg field: The field on which to run the aggregation. + :arg missing: The value to apply to documents that do not have a + value. By default, documents without a value are ignored. + :arg script: + """ + + name = "min" + + def __init__( + self, + *, + format: Union[str, "DefaultType"] = DEFAULT, + field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + missing: Union[str, int, float, bool, "DefaultType"] = DEFAULT, + script: Union["types.Script", Dict[str, Any], "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + format=format, field=field, missing=missing, script=script, **kwargs + ) + + +class MinBucket(Pipeline[_R]): + """ + A sibling pipeline aggregation which identifies the bucket(s) with the + minimum value of a specified metric in a sibling aggregation and + outputs both the value and the key(s) of the bucket(s). + + :arg format: `DecimalFormat` pattern for the output value. If + specified, the formatted value is returned in the aggregation’s + `value_as_string` property. + :arg gap_policy: Policy to apply when gaps are found in the data. + Defaults to `skip` if omitted. + :arg buckets_path: Path to the buckets that contain one set of values + to correlate. + """ + + name = "min_bucket" + + def __init__( + self, + *, + format: Union[str, "DefaultType"] = DEFAULT, + gap_policy: Union[ + Literal["skip", "insert_zeros", "keep_values"], "DefaultType" + ] = DEFAULT, + buckets_path: Union[ + str, Sequence[str], Mapping[str, str], "DefaultType" + ] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + format=format, gap_policy=gap_policy, buckets_path=buckets_path, **kwargs + ) + + +class Missing(Bucket[_R]): + """ + A field data based single bucket aggregation, that creates a bucket of + all documents in the current document set context that are missing a + field value (effectively, missing a field or having the configured + NULL value set). + + :arg field: The name of the field. + :arg missing: + """ + + name = "missing" + + def __init__( + self, + *, + field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + missing: Union[str, int, float, bool, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__(field=field, missing=missing, **kwargs) + + +class MovingAvg(Pipeline[_R]): + """ """ + + name = "moving_avg" + + def __init__(self, **kwargs: Any): + super().__init__(**kwargs) + + +class LinearMovingAverageAggregation(MovingAvg[_R]): + """ + :arg model: (required) + :arg settings: (required) + :arg minimize: + :arg predict: + :arg window: + :arg format: `DecimalFormat` pattern for the output value. If + specified, the formatted value is returned in the aggregation’s + `value_as_string` property. + :arg gap_policy: Policy to apply when gaps are found in the data. + Defaults to `skip` if omitted. + :arg buckets_path: Path to the buckets that contain one set of values + to correlate. + """ + + def __init__( + self, + *, + model: Any = DEFAULT, + settings: Union["types.EmptyObject", Dict[str, Any], "DefaultType"] = DEFAULT, + minimize: Union[bool, "DefaultType"] = DEFAULT, + predict: Union[int, "DefaultType"] = DEFAULT, + window: Union[int, "DefaultType"] = DEFAULT, + format: Union[str, "DefaultType"] = DEFAULT, + gap_policy: Union[ + Literal["skip", "insert_zeros", "keep_values"], "DefaultType" + ] = DEFAULT, + buckets_path: Union[ + str, Sequence[str], Mapping[str, str], "DefaultType" + ] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + model=model, + settings=settings, + minimize=minimize, + predict=predict, + window=window, + format=format, + gap_policy=gap_policy, + buckets_path=buckets_path, + **kwargs, + ) + + +class SimpleMovingAverageAggregation(MovingAvg[_R]): + """ + :arg model: (required) + :arg settings: (required) + :arg minimize: + :arg predict: + :arg window: + :arg format: `DecimalFormat` pattern for the output value. If + specified, the formatted value is returned in the aggregation’s + `value_as_string` property. + :arg gap_policy: Policy to apply when gaps are found in the data. + Defaults to `skip` if omitted. + :arg buckets_path: Path to the buckets that contain one set of values + to correlate. + """ + + def __init__( + self, + *, + model: Any = DEFAULT, + settings: Union["types.EmptyObject", Dict[str, Any], "DefaultType"] = DEFAULT, + minimize: Union[bool, "DefaultType"] = DEFAULT, + predict: Union[int, "DefaultType"] = DEFAULT, + window: Union[int, "DefaultType"] = DEFAULT, + format: Union[str, "DefaultType"] = DEFAULT, + gap_policy: Union[ + Literal["skip", "insert_zeros", "keep_values"], "DefaultType" + ] = DEFAULT, + buckets_path: Union[ + str, Sequence[str], Mapping[str, str], "DefaultType" + ] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + model=model, + settings=settings, + minimize=minimize, + predict=predict, + window=window, + format=format, + gap_policy=gap_policy, + buckets_path=buckets_path, + **kwargs, + ) + + +class EwmaMovingAverageAggregation(MovingAvg[_R]): + """ + :arg model: (required) + :arg settings: (required) + :arg minimize: + :arg predict: + :arg window: + :arg format: `DecimalFormat` pattern for the output value. If + specified, the formatted value is returned in the aggregation’s + `value_as_string` property. + :arg gap_policy: Policy to apply when gaps are found in the data. + Defaults to `skip` if omitted. + :arg buckets_path: Path to the buckets that contain one set of values + to correlate. + """ + + def __init__( + self, + *, + model: Any = DEFAULT, + settings: Union[ + "types.EwmaModelSettings", Dict[str, Any], "DefaultType" + ] = DEFAULT, + minimize: Union[bool, "DefaultType"] = DEFAULT, + predict: Union[int, "DefaultType"] = DEFAULT, + window: Union[int, "DefaultType"] = DEFAULT, + format: Union[str, "DefaultType"] = DEFAULT, + gap_policy: Union[ + Literal["skip", "insert_zeros", "keep_values"], "DefaultType" + ] = DEFAULT, + buckets_path: Union[ + str, Sequence[str], Mapping[str, str], "DefaultType" + ] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + model=model, + settings=settings, + minimize=minimize, + predict=predict, + window=window, + format=format, + gap_policy=gap_policy, + buckets_path=buckets_path, + **kwargs, + ) + + +class HoltMovingAverageAggregation(MovingAvg[_R]): + """ + :arg model: (required) + :arg settings: (required) + :arg minimize: + :arg predict: + :arg window: + :arg format: `DecimalFormat` pattern for the output value. If + specified, the formatted value is returned in the aggregation’s + `value_as_string` property. + :arg gap_policy: Policy to apply when gaps are found in the data. + Defaults to `skip` if omitted. + :arg buckets_path: Path to the buckets that contain one set of values + to correlate. + """ + + def __init__( + self, + *, + model: Any = DEFAULT, + settings: Union[ + "types.HoltLinearModelSettings", Dict[str, Any], "DefaultType" + ] = DEFAULT, + minimize: Union[bool, "DefaultType"] = DEFAULT, + predict: Union[int, "DefaultType"] = DEFAULT, + window: Union[int, "DefaultType"] = DEFAULT, + format: Union[str, "DefaultType"] = DEFAULT, + gap_policy: Union[ + Literal["skip", "insert_zeros", "keep_values"], "DefaultType" + ] = DEFAULT, + buckets_path: Union[ + str, Sequence[str], Mapping[str, str], "DefaultType" + ] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + model=model, + settings=settings, + minimize=minimize, + predict=predict, + window=window, + format=format, + gap_policy=gap_policy, + buckets_path=buckets_path, + **kwargs, + ) + + +class HoltWintersMovingAverageAggregation(MovingAvg[_R]): + """ + :arg model: (required) + :arg settings: (required) + :arg minimize: + :arg predict: + :arg window: + :arg format: `DecimalFormat` pattern for the output value. If + specified, the formatted value is returned in the aggregation’s + `value_as_string` property. + :arg gap_policy: Policy to apply when gaps are found in the data. + Defaults to `skip` if omitted. + :arg buckets_path: Path to the buckets that contain one set of values + to correlate. + """ + + def __init__( + self, + *, + model: Any = DEFAULT, + settings: Union[ + "types.HoltWintersModelSettings", Dict[str, Any], "DefaultType" + ] = DEFAULT, + minimize: Union[bool, "DefaultType"] = DEFAULT, + predict: Union[int, "DefaultType"] = DEFAULT, + window: Union[int, "DefaultType"] = DEFAULT, + format: Union[str, "DefaultType"] = DEFAULT, + gap_policy: Union[ + Literal["skip", "insert_zeros", "keep_values"], "DefaultType" + ] = DEFAULT, + buckets_path: Union[ + str, Sequence[str], Mapping[str, str], "DefaultType" + ] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + model=model, + settings=settings, + minimize=minimize, + predict=predict, + window=window, + format=format, + gap_policy=gap_policy, + buckets_path=buckets_path, + **kwargs, + ) + + +class MovingPercentiles(Pipeline[_R]): + """ + Given an ordered series of percentiles, "slides" a window across those + percentiles and computes cumulative percentiles. + + :arg window: The size of window to "slide" across the histogram. + :arg shift: By default, the window consists of the last n values + excluding the current bucket. Increasing `shift` by 1, moves the + starting window position by 1 to the right. + :arg keyed: + :arg format: `DecimalFormat` pattern for the output value. If + specified, the formatted value is returned in the aggregation’s + `value_as_string` property. + :arg gap_policy: Policy to apply when gaps are found in the data. + Defaults to `skip` if omitted. + :arg buckets_path: Path to the buckets that contain one set of values + to correlate. + """ + + name = "moving_percentiles" + + def __init__( + self, + *, + window: Union[int, "DefaultType"] = DEFAULT, + shift: Union[int, "DefaultType"] = DEFAULT, + keyed: Union[bool, "DefaultType"] = DEFAULT, + format: Union[str, "DefaultType"] = DEFAULT, + gap_policy: Union[ + Literal["skip", "insert_zeros", "keep_values"], "DefaultType" + ] = DEFAULT, + buckets_path: Union[ + str, Sequence[str], Mapping[str, str], "DefaultType" + ] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + window=window, + shift=shift, + keyed=keyed, + format=format, + gap_policy=gap_policy, + buckets_path=buckets_path, + **kwargs, + ) + + +class MovingFn(Pipeline[_R]): + """ + Given an ordered series of data, "slides" a window across the data and + runs a custom script on each window of data. For convenience, a number + of common functions are predefined such as `min`, `max`, and moving + averages. + + :arg script: The script that should be executed on each window of + data. + :arg shift: By default, the window consists of the last n values + excluding the current bucket. Increasing `shift` by 1, moves the + starting window position by 1 to the right. + :arg window: The size of window to "slide" across the histogram. + :arg format: `DecimalFormat` pattern for the output value. If + specified, the formatted value is returned in the aggregation’s + `value_as_string` property. + :arg gap_policy: Policy to apply when gaps are found in the data. + Defaults to `skip` if omitted. + :arg buckets_path: Path to the buckets that contain one set of values + to correlate. + """ + + name = "moving_fn" + + def __init__( + self, + *, + script: Union[str, "DefaultType"] = DEFAULT, + shift: Union[int, "DefaultType"] = DEFAULT, + window: Union[int, "DefaultType"] = DEFAULT, + format: Union[str, "DefaultType"] = DEFAULT, + gap_policy: Union[ + Literal["skip", "insert_zeros", "keep_values"], "DefaultType" + ] = DEFAULT, + buckets_path: Union[ + str, Sequence[str], Mapping[str, str], "DefaultType" + ] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + script=script, + shift=shift, + window=window, + format=format, + gap_policy=gap_policy, + buckets_path=buckets_path, + **kwargs, + ) + + +class MultiTerms(Bucket[_R]): + """ + A multi-bucket value source based aggregation where buckets are + dynamically built - one per unique set of values. + + :arg terms: (required) The field from which to generate sets of terms. + :arg collect_mode: Specifies the strategy for data collection. + Defaults to `breadth_first` if omitted. + :arg order: Specifies the sort order of the buckets. Defaults to + sorting by descending document count. + :arg min_doc_count: The minimum number of documents in a bucket for it + to be returned. Defaults to `1` if omitted. + :arg shard_min_doc_count: The minimum number of documents in a bucket + on each shard for it to be returned. Defaults to `1` if omitted. + :arg shard_size: The number of candidate terms produced by each shard. + By default, `shard_size` will be automatically estimated based on + the number of shards and the `size` parameter. + :arg show_term_doc_count_error: Calculates the doc count error on per + term basis. + :arg size: The number of term buckets should be returned out of the + overall terms list. Defaults to `10` if omitted. + """ + + name = "multi_terms" + + def __init__( + self, + *, + terms: Union[ + Sequence["types.MultiTermLookup"], Sequence[Dict[str, Any]], "DefaultType" + ] = DEFAULT, + collect_mode: Union[ + Literal["depth_first", "breadth_first"], "DefaultType" + ] = DEFAULT, + order: Union[ + Mapping[Union[str, "InstrumentedField"], Literal["asc", "desc"]], + Sequence[Mapping[Union[str, "InstrumentedField"], Literal["asc", "desc"]]], + "DefaultType", + ] = DEFAULT, + min_doc_count: Union[int, "DefaultType"] = DEFAULT, + shard_min_doc_count: Union[int, "DefaultType"] = DEFAULT, + shard_size: Union[int, "DefaultType"] = DEFAULT, + show_term_doc_count_error: Union[bool, "DefaultType"] = DEFAULT, + size: Union[int, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + terms=terms, + collect_mode=collect_mode, + order=order, + min_doc_count=min_doc_count, + shard_min_doc_count=shard_min_doc_count, + shard_size=shard_size, + show_term_doc_count_error=show_term_doc_count_error, + size=size, + **kwargs, + ) + + +class Nested(Bucket[_R]): + """ + A special single bucket aggregation that enables aggregating nested + documents. + + :arg path: The path to the field of type `nested`. + """ + + name = "nested" + + def __init__( + self, + path: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__(path=path, **kwargs) + + +class Normalize(Pipeline[_R]): + """ + A parent pipeline aggregation which calculates the specific + normalized/rescaled value for a specific bucket value. + + :arg method: The specific method to apply. + :arg format: `DecimalFormat` pattern for the output value. If + specified, the formatted value is returned in the aggregation’s + `value_as_string` property. + :arg gap_policy: Policy to apply when gaps are found in the data. + Defaults to `skip` if omitted. + :arg buckets_path: Path to the buckets that contain one set of values + to correlate. + """ + + name = "normalize" + + def __init__( + self, + *, + method: Union[ + Literal[ + "rescale_0_1", + "rescale_0_100", + "percent_of_sum", + "mean", + "z-score", + "softmax", + ], + "DefaultType", + ] = DEFAULT, + format: Union[str, "DefaultType"] = DEFAULT, + gap_policy: Union[ + Literal["skip", "insert_zeros", "keep_values"], "DefaultType" + ] = DEFAULT, + buckets_path: Union[ + str, Sequence[str], Mapping[str, str], "DefaultType" + ] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + method=method, + format=format, + gap_policy=gap_policy, + buckets_path=buckets_path, + **kwargs, + ) + + +class Parent(Bucket[_R]): + """ + A special single bucket aggregation that selects parent documents that + have the specified type, as defined in a `join` field. + + :arg type: The child type that should be selected. + """ + + name = "parent" + + def __init__(self, type: Union[str, "DefaultType"] = DEFAULT, **kwargs: Any): + super().__init__(type=type, **kwargs) + + +class PercentileRanks(Agg[_R]): + """ + A multi-value metrics aggregation that calculates one or more + percentile ranks over numeric values extracted from the aggregated + documents. + + :arg keyed: By default, the aggregation associates a unique string key + with each bucket and returns the ranges as a hash rather than an + array. Set to `false` to disable this behavior. Defaults to `True` + if omitted. + :arg values: An array of values for which to calculate the percentile + ranks. + :arg hdr: Uses the alternative High Dynamic Range Histogram algorithm + to calculate percentile ranks. + :arg tdigest: Sets parameters for the default TDigest algorithm used + to calculate percentile ranks. + :arg format: + :arg field: The field on which to run the aggregation. + :arg missing: The value to apply to documents that do not have a + value. By default, documents without a value are ignored. + :arg script: + """ + + name = "percentile_ranks" + + def __init__( + self, + *, + keyed: Union[bool, "DefaultType"] = DEFAULT, + values: Union[Sequence[float], None, "DefaultType"] = DEFAULT, + hdr: Union["types.HdrMethod", Dict[str, Any], "DefaultType"] = DEFAULT, + tdigest: Union["types.TDigest", Dict[str, Any], "DefaultType"] = DEFAULT, + format: Union[str, "DefaultType"] = DEFAULT, + field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + missing: Union[str, int, float, bool, "DefaultType"] = DEFAULT, + script: Union["types.Script", Dict[str, Any], "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + keyed=keyed, + values=values, + hdr=hdr, + tdigest=tdigest, + format=format, + field=field, + missing=missing, + script=script, + **kwargs, + ) + + +class Percentiles(Agg[_R]): + """ + A multi-value metrics aggregation that calculates one or more + percentiles over numeric values extracted from the aggregated + documents. + + :arg keyed: By default, the aggregation associates a unique string key + with each bucket and returns the ranges as a hash rather than an + array. Set to `false` to disable this behavior. Defaults to `True` + if omitted. + :arg percents: The percentiles to calculate. + :arg hdr: Uses the alternative High Dynamic Range Histogram algorithm + to calculate percentiles. + :arg tdigest: Sets parameters for the default TDigest algorithm used + to calculate percentiles. + :arg format: + :arg field: The field on which to run the aggregation. + :arg missing: The value to apply to documents that do not have a + value. By default, documents without a value are ignored. + :arg script: + """ + + name = "percentiles" + + def __init__( + self, + *, + keyed: Union[bool, "DefaultType"] = DEFAULT, + percents: Union[Sequence[float], "DefaultType"] = DEFAULT, + hdr: Union["types.HdrMethod", Dict[str, Any], "DefaultType"] = DEFAULT, + tdigest: Union["types.TDigest", Dict[str, Any], "DefaultType"] = DEFAULT, + format: Union[str, "DefaultType"] = DEFAULT, + field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + missing: Union[str, int, float, bool, "DefaultType"] = DEFAULT, + script: Union["types.Script", Dict[str, Any], "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + keyed=keyed, + percents=percents, + hdr=hdr, + tdigest=tdigest, + format=format, + field=field, + missing=missing, + script=script, + **kwargs, + ) + + +class PercentilesBucket(Pipeline[_R]): + """ + A sibling pipeline aggregation which calculates percentiles across all + bucket of a specified metric in a sibling aggregation. + + :arg percents: The list of percentiles to calculate. + :arg format: `DecimalFormat` pattern for the output value. If + specified, the formatted value is returned in the aggregation’s + `value_as_string` property. + :arg gap_policy: Policy to apply when gaps are found in the data. + Defaults to `skip` if omitted. + :arg buckets_path: Path to the buckets that contain one set of values + to correlate. + """ + + name = "percentiles_bucket" + + def __init__( + self, + *, + percents: Union[Sequence[float], "DefaultType"] = DEFAULT, + format: Union[str, "DefaultType"] = DEFAULT, + gap_policy: Union[ + Literal["skip", "insert_zeros", "keep_values"], "DefaultType" + ] = DEFAULT, + buckets_path: Union[ + str, Sequence[str], Mapping[str, str], "DefaultType" + ] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + percents=percents, + format=format, + gap_policy=gap_policy, + buckets_path=buckets_path, + **kwargs, + ) + + +class Range(Bucket[_R]): + """ + A multi-bucket value source based aggregation that enables the user to + define a set of ranges - each representing a bucket. + + :arg field: The date field whose values are use to build ranges. + :arg missing: The value to apply to documents that do not have a + value. By default, documents without a value are ignored. + :arg ranges: An array of ranges used to bucket documents. + :arg script: + :arg keyed: Set to `true` to associate a unique string key with each + bucket and return the ranges as a hash rather than an array. + :arg format: + """ + + name = "range" + + def __init__( + self, + *, + field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + missing: Union[int, "DefaultType"] = DEFAULT, + ranges: Union[ + Sequence["types.AggregationRange"], Sequence[Dict[str, Any]], "DefaultType" + ] = DEFAULT, + script: Union["types.Script", Dict[str, Any], "DefaultType"] = DEFAULT, + keyed: Union[bool, "DefaultType"] = DEFAULT, + format: Union[str, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + field=field, + missing=missing, + ranges=ranges, + script=script, + keyed=keyed, + format=format, + **kwargs, + ) + + +class RareTerms(Bucket[_R]): + """ + A multi-bucket value source based aggregation which finds "rare" + terms — terms that are at the long-tail of the distribution and are + not frequent. + + :arg exclude: Terms that should be excluded from the aggregation. + :arg field: The field from which to return rare terms. + :arg include: Terms that should be included in the aggregation. + :arg max_doc_count: The maximum number of documents a term should + appear in. Defaults to `1` if omitted. + :arg missing: The value to apply to documents that do not have a + value. By default, documents without a value are ignored. + :arg precision: The precision of the internal CuckooFilters. Smaller + precision leads to better approximation, but higher memory usage. + Defaults to `0.001` if omitted. + :arg value_type: + """ + + name = "rare_terms" + + def __init__( + self, + *, + exclude: Union[str, Sequence[str], "DefaultType"] = DEFAULT, + field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + include: Union[ + str, Sequence[str], "types.TermsPartition", Dict[str, Any], "DefaultType" + ] = DEFAULT, + max_doc_count: Union[int, "DefaultType"] = DEFAULT, + missing: Union[str, int, float, bool, "DefaultType"] = DEFAULT, + precision: Union[float, "DefaultType"] = DEFAULT, + value_type: Union[str, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + exclude=exclude, + field=field, + include=include, + max_doc_count=max_doc_count, + missing=missing, + precision=precision, + value_type=value_type, + **kwargs, + ) + + +class Rate(Agg[_R]): + """ + Calculates a rate of documents or a field in each bucket. Can only be + used inside a `date_histogram` or `composite` aggregation. + + :arg unit: The interval used to calculate the rate. By default, the + interval of the `date_histogram` is used. + :arg mode: How the rate is calculated. Defaults to `sum` if omitted. + :arg format: + :arg field: The field on which to run the aggregation. + :arg missing: The value to apply to documents that do not have a + value. By default, documents without a value are ignored. + :arg script: + """ + + name = "rate" + + def __init__( + self, + *, + unit: Union[ + Literal[ + "second", "minute", "hour", "day", "week", "month", "quarter", "year" + ], + "DefaultType", + ] = DEFAULT, + mode: Union[Literal["sum", "value_count"], "DefaultType"] = DEFAULT, + format: Union[str, "DefaultType"] = DEFAULT, + field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + missing: Union[str, int, float, bool, "DefaultType"] = DEFAULT, + script: Union["types.Script", Dict[str, Any], "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + unit=unit, + mode=mode, + format=format, + field=field, + missing=missing, + script=script, + **kwargs, + ) + + +class ReverseNested(Bucket[_R]): + """ + A special single bucket aggregation that enables aggregating on parent + documents from nested documents. Should only be defined inside a + `nested` aggregation. + + :arg path: Defines the nested object field that should be joined back + to. The default is empty, which means that it joins back to the + root/main document level. + """ + + name = "reverse_nested" + + def __init__( + self, + path: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__(path=path, **kwargs) + + +class RandomSampler(Bucket[_R]): + """ + A single bucket aggregation that randomly includes documents in the + aggregated results. Sampling provides significant speed improvement at + the cost of accuracy. + + :arg probability: (required) The probability that a document will be + included in the aggregated data. Must be greater than 0, less than + 0.5, or exactly 1. The lower the probability, the fewer documents + are matched. + :arg seed: The seed to generate the random sampling of documents. When + a seed is provided, the random subset of documents is the same + between calls. + :arg shard_seed: When combined with seed, setting shard_seed ensures + 100% consistent sampling over shards where data is exactly the + same. + """ + + name = "random_sampler" + + def __init__( + self, + *, + probability: Union[float, "DefaultType"] = DEFAULT, + seed: Union[int, "DefaultType"] = DEFAULT, + shard_seed: Union[int, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + probability=probability, seed=seed, shard_seed=shard_seed, **kwargs + ) + + +class Sampler(Bucket[_R]): + """ + A filtering aggregation used to limit any sub aggregations' processing + to a sample of the top-scoring documents. + + :arg shard_size: Limits how many top-scoring documents are collected + in the sample processed on each shard. Defaults to `100` if + omitted. + """ + + name = "sampler" + + def __init__(self, shard_size: Union[int, "DefaultType"] = DEFAULT, **kwargs: Any): + super().__init__(shard_size=shard_size, **kwargs) + + +class ScriptedMetric(Agg[_R]): + """ + A metric aggregation that uses scripts to provide a metric output. + + :arg combine_script: Runs once on each shard after document collection + is complete. Allows the aggregation to consolidate the state + returned from each shard. + :arg init_script: Runs prior to any collection of documents. Allows + the aggregation to set up any initial state. + :arg map_script: Run once per document collected. If no + `combine_script` is specified, the resulting state needs to be + stored in the `state` object. + :arg params: A global object with script parameters for `init`, `map` + and `combine` scripts. It is shared between the scripts. + :arg reduce_script: Runs once on the coordinating node after all + shards have returned their results. The script is provided with + access to a variable `states`, which is an array of the result of + the `combine_script` on each shard. + :arg field: The field on which to run the aggregation. + :arg missing: The value to apply to documents that do not have a + value. By default, documents without a value are ignored. + :arg script: + """ + + name = "scripted_metric" + + def __init__( + self, + *, + combine_script: Union["types.Script", Dict[str, Any], "DefaultType"] = DEFAULT, + init_script: Union["types.Script", Dict[str, Any], "DefaultType"] = DEFAULT, + map_script: Union["types.Script", Dict[str, Any], "DefaultType"] = DEFAULT, + params: Union[Mapping[str, Any], "DefaultType"] = DEFAULT, + reduce_script: Union["types.Script", Dict[str, Any], "DefaultType"] = DEFAULT, + field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + missing: Union[str, int, float, bool, "DefaultType"] = DEFAULT, + script: Union["types.Script", Dict[str, Any], "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + combine_script=combine_script, + init_script=init_script, + map_script=map_script, + params=params, + reduce_script=reduce_script, + field=field, + missing=missing, + script=script, + **kwargs, + ) + + +class SerialDiff(Pipeline[_R]): + """ + An aggregation that subtracts values in a time series from themselves + at different time lags or periods. + + :arg lag: The historical bucket to subtract from the current value. + Must be a positive, non-zero integer. + :arg format: `DecimalFormat` pattern for the output value. If + specified, the formatted value is returned in the aggregation’s + `value_as_string` property. + :arg gap_policy: Policy to apply when gaps are found in the data. + Defaults to `skip` if omitted. + :arg buckets_path: Path to the buckets that contain one set of values + to correlate. + """ + + name = "serial_diff" + + def __init__( + self, + *, + lag: Union[int, "DefaultType"] = DEFAULT, + format: Union[str, "DefaultType"] = DEFAULT, + gap_policy: Union[ + Literal["skip", "insert_zeros", "keep_values"], "DefaultType" + ] = DEFAULT, + buckets_path: Union[ + str, Sequence[str], Mapping[str, str], "DefaultType" + ] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + lag=lag, + format=format, + gap_policy=gap_policy, + buckets_path=buckets_path, + **kwargs, + ) + + +class SignificantTerms(Bucket[_R]): + """ + Returns interesting or unusual occurrences of terms in a set. + + :arg background_filter: A background filter that can be used to focus + in on significant terms within a narrower context, instead of the + entire index. + :arg chi_square: Use Chi square, as described in "Information + Retrieval", Manning et al., Chapter 13.5.2, as the significance + score. + :arg exclude: Terms to exclude. + :arg execution_hint: Mechanism by which the aggregation should be + executed: using field values directly or using global ordinals. + :arg field: The field from which to return significant terms. + :arg gnd: Use Google normalized distance as described in "The Google + Similarity Distance", Cilibrasi and Vitanyi, 2007, as the + significance score. + :arg include: Terms to include. + :arg jlh: Use JLH score as the significance score. + :arg min_doc_count: Only return terms that are found in more than + `min_doc_count` hits. Defaults to `3` if omitted. + :arg mutual_information: Use mutual information as described in + "Information Retrieval", Manning et al., Chapter 13.5.1, as the + significance score. + :arg percentage: A simple calculation of the number of documents in + the foreground sample with a term divided by the number of + documents in the background with the term. + :arg script_heuristic: Customized score, implemented via a script. + :arg shard_min_doc_count: Regulates the certainty a shard has if the + term should actually be added to the candidate list or not with + respect to the `min_doc_count`. Terms will only be considered if + their local shard frequency within the set is higher than the + `shard_min_doc_count`. + :arg shard_size: Can be used to control the volumes of candidate terms + produced by each shard. By default, `shard_size` will be + automatically estimated based on the number of shards and the + `size` parameter. + :arg size: The number of buckets returned out of the overall terms + list. + """ + + name = "significant_terms" + _param_defs = { + "background_filter": {"type": "query"}, + } + + def __init__( + self, + *, + background_filter: Union[Query, "DefaultType"] = DEFAULT, + chi_square: Union[ + "types.ChiSquareHeuristic", Dict[str, Any], "DefaultType" + ] = DEFAULT, + exclude: Union[str, Sequence[str], "DefaultType"] = DEFAULT, + execution_hint: Union[ + Literal[ + "map", + "global_ordinals", + "global_ordinals_hash", + "global_ordinals_low_cardinality", + ], + "DefaultType", + ] = DEFAULT, + field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + gnd: Union[ + "types.GoogleNormalizedDistanceHeuristic", Dict[str, Any], "DefaultType" + ] = DEFAULT, + include: Union[ + str, Sequence[str], "types.TermsPartition", Dict[str, Any], "DefaultType" + ] = DEFAULT, + jlh: Union["types.EmptyObject", Dict[str, Any], "DefaultType"] = DEFAULT, + min_doc_count: Union[int, "DefaultType"] = DEFAULT, + mutual_information: Union[ + "types.MutualInformationHeuristic", Dict[str, Any], "DefaultType" + ] = DEFAULT, + percentage: Union[ + "types.PercentageScoreHeuristic", Dict[str, Any], "DefaultType" + ] = DEFAULT, + script_heuristic: Union[ + "types.ScriptedHeuristic", Dict[str, Any], "DefaultType" + ] = DEFAULT, + shard_min_doc_count: Union[int, "DefaultType"] = DEFAULT, + shard_size: Union[int, "DefaultType"] = DEFAULT, + size: Union[int, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + background_filter=background_filter, + chi_square=chi_square, + exclude=exclude, + execution_hint=execution_hint, + field=field, + gnd=gnd, + include=include, + jlh=jlh, + min_doc_count=min_doc_count, + mutual_information=mutual_information, + percentage=percentage, + script_heuristic=script_heuristic, + shard_min_doc_count=shard_min_doc_count, + shard_size=shard_size, + size=size, + **kwargs, + ) + + +class SignificantText(Bucket[_R]): + """ + Returns interesting or unusual occurrences of free-text terms in a + set. + + :arg background_filter: A background filter that can be used to focus + in on significant terms within a narrower context, instead of the + entire index. + :arg chi_square: Use Chi square, as described in "Information + Retrieval", Manning et al., Chapter 13.5.2, as the significance + score. + :arg exclude: Values to exclude. + :arg execution_hint: Determines whether the aggregation will use field + values directly or global ordinals. + :arg field: The field from which to return significant text. + :arg filter_duplicate_text: Whether to out duplicate text to deal with + noisy data. + :arg gnd: Use Google normalized distance as described in "The Google + Similarity Distance", Cilibrasi and Vitanyi, 2007, as the + significance score. + :arg include: Values to include. + :arg jlh: Use JLH score as the significance score. + :arg min_doc_count: Only return values that are found in more than + `min_doc_count` hits. Defaults to `3` if omitted. + :arg mutual_information: Use mutual information as described in + "Information Retrieval", Manning et al., Chapter 13.5.1, as the + significance score. + :arg percentage: A simple calculation of the number of documents in + the foreground sample with a term divided by the number of + documents in the background with the term. + :arg script_heuristic: Customized score, implemented via a script. + :arg shard_min_doc_count: Regulates the certainty a shard has if the + values should actually be added to the candidate list or not with + respect to the min_doc_count. Values will only be considered if + their local shard frequency within the set is higher than the + `shard_min_doc_count`. + :arg shard_size: The number of candidate terms produced by each shard. + By default, `shard_size` will be automatically estimated based on + the number of shards and the `size` parameter. + :arg size: The number of buckets returned out of the overall terms + list. + :arg source_fields: Overrides the JSON `_source` fields from which + text will be analyzed. + """ + + name = "significant_text" + _param_defs = { + "background_filter": {"type": "query"}, + } + + def __init__( + self, + *, + background_filter: Union[Query, "DefaultType"] = DEFAULT, + chi_square: Union[ + "types.ChiSquareHeuristic", Dict[str, Any], "DefaultType" + ] = DEFAULT, + exclude: Union[str, Sequence[str], "DefaultType"] = DEFAULT, + execution_hint: Union[ + Literal[ + "map", + "global_ordinals", + "global_ordinals_hash", + "global_ordinals_low_cardinality", + ], + "DefaultType", + ] = DEFAULT, + field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + filter_duplicate_text: Union[bool, "DefaultType"] = DEFAULT, + gnd: Union[ + "types.GoogleNormalizedDistanceHeuristic", Dict[str, Any], "DefaultType" + ] = DEFAULT, + include: Union[ + str, Sequence[str], "types.TermsPartition", Dict[str, Any], "DefaultType" + ] = DEFAULT, + jlh: Union["types.EmptyObject", Dict[str, Any], "DefaultType"] = DEFAULT, + min_doc_count: Union[int, "DefaultType"] = DEFAULT, + mutual_information: Union[ + "types.MutualInformationHeuristic", Dict[str, Any], "DefaultType" + ] = DEFAULT, + percentage: Union[ + "types.PercentageScoreHeuristic", Dict[str, Any], "DefaultType" + ] = DEFAULT, + script_heuristic: Union[ + "types.ScriptedHeuristic", Dict[str, Any], "DefaultType" + ] = DEFAULT, + shard_min_doc_count: Union[int, "DefaultType"] = DEFAULT, + shard_size: Union[int, "DefaultType"] = DEFAULT, + size: Union[int, "DefaultType"] = DEFAULT, + source_fields: Union[ + Union[str, "InstrumentedField"], + Sequence[Union[str, "InstrumentedField"]], + "DefaultType", + ] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + background_filter=background_filter, + chi_square=chi_square, + exclude=exclude, + execution_hint=execution_hint, + field=field, + filter_duplicate_text=filter_duplicate_text, + gnd=gnd, + include=include, + jlh=jlh, + min_doc_count=min_doc_count, + mutual_information=mutual_information, + percentage=percentage, + script_heuristic=script_heuristic, + shard_min_doc_count=shard_min_doc_count, + shard_size=shard_size, + size=size, + source_fields=source_fields, + **kwargs, + ) + + +class Stats(Agg[_R]): + """ + A multi-value metrics aggregation that computes stats over numeric + values extracted from the aggregated documents. + + :arg format: + :arg field: The field on which to run the aggregation. + :arg missing: The value to apply to documents that do not have a + value. By default, documents without a value are ignored. + :arg script: + """ + + name = "stats" + + def __init__( + self, + *, + format: Union[str, "DefaultType"] = DEFAULT, + field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + missing: Union[str, int, float, bool, "DefaultType"] = DEFAULT, + script: Union["types.Script", Dict[str, Any], "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + format=format, field=field, missing=missing, script=script, **kwargs + ) + + +class StatsBucket(Pipeline[_R]): + """ + A sibling pipeline aggregation which calculates a variety of stats + across all bucket of a specified metric in a sibling aggregation. + + :arg format: `DecimalFormat` pattern for the output value. If + specified, the formatted value is returned in the aggregation’s + `value_as_string` property. + :arg gap_policy: Policy to apply when gaps are found in the data. + Defaults to `skip` if omitted. + :arg buckets_path: Path to the buckets that contain one set of values + to correlate. + """ + + name = "stats_bucket" + + def __init__( + self, + *, + format: Union[str, "DefaultType"] = DEFAULT, + gap_policy: Union[ + Literal["skip", "insert_zeros", "keep_values"], "DefaultType" + ] = DEFAULT, + buckets_path: Union[ + str, Sequence[str], Mapping[str, str], "DefaultType" + ] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + format=format, gap_policy=gap_policy, buckets_path=buckets_path, **kwargs + ) + + +class StringStats(Agg[_R]): + """ + A multi-value metrics aggregation that computes statistics over string + values extracted from the aggregated documents. + + :arg show_distribution: Shows the probability distribution for all + characters. + :arg field: The field on which to run the aggregation. + :arg missing: The value to apply to documents that do not have a + value. By default, documents without a value are ignored. + :arg script: + """ + + name = "string_stats" + + def __init__( + self, + *, + show_distribution: Union[bool, "DefaultType"] = DEFAULT, + field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + missing: Union[str, int, float, bool, "DefaultType"] = DEFAULT, + script: Union["types.Script", Dict[str, Any], "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + show_distribution=show_distribution, + field=field, + missing=missing, + script=script, + **kwargs, + ) + + +class Sum(Agg[_R]): + """ + A single-value metrics aggregation that sums numeric values that are + extracted from the aggregated documents. + + :arg format: + :arg field: The field on which to run the aggregation. + :arg missing: The value to apply to documents that do not have a + value. By default, documents without a value are ignored. + :arg script: + """ + + name = "sum" + + def __init__( + self, + *, + format: Union[str, "DefaultType"] = DEFAULT, + field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + missing: Union[str, int, float, bool, "DefaultType"] = DEFAULT, + script: Union["types.Script", Dict[str, Any], "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + format=format, field=field, missing=missing, script=script, **kwargs + ) + + +class SumBucket(Pipeline[_R]): + """ + A sibling pipeline aggregation which calculates the sum of a specified + metric across all buckets in a sibling aggregation. + + :arg format: `DecimalFormat` pattern for the output value. If + specified, the formatted value is returned in the aggregation’s + `value_as_string` property. + :arg gap_policy: Policy to apply when gaps are found in the data. + Defaults to `skip` if omitted. + :arg buckets_path: Path to the buckets that contain one set of values + to correlate. + """ + + name = "sum_bucket" + + def __init__( + self, + *, + format: Union[str, "DefaultType"] = DEFAULT, + gap_policy: Union[ + Literal["skip", "insert_zeros", "keep_values"], "DefaultType" + ] = DEFAULT, + buckets_path: Union[ + str, Sequence[str], Mapping[str, str], "DefaultType" + ] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + format=format, gap_policy=gap_policy, buckets_path=buckets_path, **kwargs + ) + + +class Terms(Bucket[_R]): + """ + A multi-bucket value source based aggregation where buckets are + dynamically built - one per unique value. + + :arg collect_mode: Determines how child aggregations should be + calculated: breadth-first or depth-first. + :arg exclude: Values to exclude. Accepts regular expressions and + partitions. + :arg execution_hint: Determines whether the aggregation will use field + values directly or global ordinals. + :arg field: The field from which to return terms. + :arg include: Values to include. Accepts regular expressions and + partitions. + :arg min_doc_count: Only return values that are found in more than + `min_doc_count` hits. Defaults to `1` if omitted. + :arg missing: The value to apply to documents that do not have a + value. By default, documents without a value are ignored. + :arg missing_order: + :arg missing_bucket: + :arg value_type: Coerced unmapped fields into the specified type. + :arg order: Specifies the sort order of the buckets. Defaults to + sorting by descending document count. + :arg script: + :arg shard_min_doc_count: Regulates the certainty a shard has if the + term should actually be added to the candidate list or not with + respect to the `min_doc_count`. Terms will only be considered if + their local shard frequency within the set is higher than the + `shard_min_doc_count`. + :arg shard_size: The number of candidate terms produced by each shard. + By default, `shard_size` will be automatically estimated based on + the number of shards and the `size` parameter. + :arg show_term_doc_count_error: Set to `true` to return the + `doc_count_error_upper_bound`, which is an upper bound to the + error on the `doc_count` returned by each shard. + :arg size: The number of buckets returned out of the overall terms + list. Defaults to `10` if omitted. + :arg format: + """ + + name = "terms" + + def __init__( + self, + *, + collect_mode: Union[ + Literal["depth_first", "breadth_first"], "DefaultType" + ] = DEFAULT, + exclude: Union[str, Sequence[str], "DefaultType"] = DEFAULT, + execution_hint: Union[ + Literal[ + "map", + "global_ordinals", + "global_ordinals_hash", + "global_ordinals_low_cardinality", + ], + "DefaultType", + ] = DEFAULT, + field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + include: Union[ + str, Sequence[str], "types.TermsPartition", Dict[str, Any], "DefaultType" + ] = DEFAULT, + min_doc_count: Union[int, "DefaultType"] = DEFAULT, + missing: Union[str, int, float, bool, "DefaultType"] = DEFAULT, + missing_order: Union[ + Literal["first", "last", "default"], "DefaultType" + ] = DEFAULT, + missing_bucket: Union[bool, "DefaultType"] = DEFAULT, + value_type: Union[str, "DefaultType"] = DEFAULT, + order: Union[ + Mapping[Union[str, "InstrumentedField"], Literal["asc", "desc"]], + Sequence[Mapping[Union[str, "InstrumentedField"], Literal["asc", "desc"]]], + "DefaultType", + ] = DEFAULT, + script: Union["types.Script", Dict[str, Any], "DefaultType"] = DEFAULT, + shard_min_doc_count: Union[int, "DefaultType"] = DEFAULT, + shard_size: Union[int, "DefaultType"] = DEFAULT, + show_term_doc_count_error: Union[bool, "DefaultType"] = DEFAULT, + size: Union[int, "DefaultType"] = DEFAULT, + format: Union[str, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + collect_mode=collect_mode, + exclude=exclude, + execution_hint=execution_hint, + field=field, + include=include, + min_doc_count=min_doc_count, + missing=missing, + missing_order=missing_order, + missing_bucket=missing_bucket, + value_type=value_type, + order=order, + script=script, + shard_min_doc_count=shard_min_doc_count, + shard_size=shard_size, + show_term_doc_count_error=show_term_doc_count_error, + size=size, + format=format, + **kwargs, + ) + + def result(self, search: "SearchBase[_R]", data: Any) -> AttrDict[Any]: + return FieldBucketData(self, search, data) + + +class TimeSeries(Bucket[_R]): + """ + The time series aggregation queries data created using a time series + index. This is typically data such as metrics or other data streams + with a time component, and requires creating an index using the time + series mode. + + :arg size: The maximum number of results to return. Defaults to + `10000` if omitted. + :arg keyed: Set to `true` to associate a unique string key with each + bucket and returns the ranges as a hash rather than an array. + """ + + name = "time_series" + + def __init__( + self, + *, + size: Union[int, "DefaultType"] = DEFAULT, + keyed: Union[bool, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__(size=size, keyed=keyed, **kwargs) + + +class TopHits(Agg[_R]): + """ + A metric aggregation that returns the top matching documents per + bucket. + + :arg docvalue_fields: Fields for which to return doc values. + :arg explain: If `true`, returns detailed information about score + computation as part of a hit. + :arg fields: Array of wildcard (*) patterns. The request returns + values for field names matching these patterns in the hits.fields + property of the response. + :arg from: Starting document offset. + :arg highlight: Specifies the highlighter to use for retrieving + highlighted snippets from one or more fields in the search + results. + :arg script_fields: Returns the result of one or more script + evaluations for each hit. + :arg size: The maximum number of top matching hits to return per + bucket. Defaults to `3` if omitted. + :arg sort: Sort order of the top matching hits. By default, the hits + are sorted by the score of the main query. + :arg _source: Selects the fields of the source that are returned. + :arg stored_fields: Returns values for the specified stored fields + (fields that use the `store` mapping option). + :arg track_scores: If `true`, calculates and returns document scores, + even if the scores are not used for sorting. + :arg version: If `true`, returns document version as part of a hit. + :arg seq_no_primary_term: If `true`, returns sequence number and + primary term of the last modification of each hit. + :arg field: The field on which to run the aggregation. + :arg missing: The value to apply to documents that do not have a + value. By default, documents without a value are ignored. + :arg script: + """ + + name = "top_hits" + + def __init__( + self, + *, + docvalue_fields: Union[ + Sequence["types.FieldAndFormat"], Sequence[Dict[str, Any]], "DefaultType" + ] = DEFAULT, + explain: Union[bool, "DefaultType"] = DEFAULT, + fields: Union[ + Sequence["types.FieldAndFormat"], Sequence[Dict[str, Any]], "DefaultType" + ] = DEFAULT, + from_: Union[int, "DefaultType"] = DEFAULT, + highlight: Union["types.Highlight", Dict[str, Any], "DefaultType"] = DEFAULT, + script_fields: Union[ + Mapping[str, "types.ScriptField"], Dict[str, Any], "DefaultType" + ] = DEFAULT, + size: Union[int, "DefaultType"] = DEFAULT, + sort: Union[ + Union[Union[str, "InstrumentedField"], "types.SortOptions"], + Sequence[Union[Union[str, "InstrumentedField"], "types.SortOptions"]], + Dict[str, Any], + "DefaultType", + ] = DEFAULT, + _source: Union[ + bool, "types.SourceFilter", Dict[str, Any], "DefaultType" + ] = DEFAULT, + stored_fields: Union[ + Union[str, "InstrumentedField"], + Sequence[Union[str, "InstrumentedField"]], + "DefaultType", + ] = DEFAULT, + track_scores: Union[bool, "DefaultType"] = DEFAULT, + version: Union[bool, "DefaultType"] = DEFAULT, + seq_no_primary_term: Union[bool, "DefaultType"] = DEFAULT, + field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + missing: Union[str, int, float, bool, "DefaultType"] = DEFAULT, + script: Union["types.Script", Dict[str, Any], "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + docvalue_fields=docvalue_fields, + explain=explain, + fields=fields, + from_=from_, + highlight=highlight, + script_fields=script_fields, + size=size, + sort=sort, + _source=_source, + stored_fields=stored_fields, + track_scores=track_scores, + version=version, + seq_no_primary_term=seq_no_primary_term, + field=field, + missing=missing, + script=script, + **kwargs, + ) + + def result(self, search: "SearchBase[_R]", data: Any) -> AttrDict[Any]: + return TopHitsData(self, search, data) + + +class TTest(Agg[_R]): + """ + A metrics aggregation that performs a statistical hypothesis test in + which the test statistic follows a Student’s t-distribution under the + null hypothesis on numeric values extracted from the aggregated + documents. + + :arg a: Test population A. + :arg b: Test population B. + :arg type: The type of test. Defaults to `heteroscedastic` if omitted. + """ + + name = "t_test" + + def __init__( + self, + *, + a: Union["types.TestPopulation", Dict[str, Any], "DefaultType"] = DEFAULT, + b: Union["types.TestPopulation", Dict[str, Any], "DefaultType"] = DEFAULT, + type: Union[ + Literal["paired", "homoscedastic", "heteroscedastic"], "DefaultType" + ] = DEFAULT, + **kwargs: Any, + ): + super().__init__(a=a, b=b, type=type, **kwargs) + + +class TopMetrics(Agg[_R]): + """ + A metric aggregation that selects metrics from the document with the + largest or smallest sort value. + + :arg metrics: The fields of the top document to return. + :arg size: The number of top documents from which to return metrics. + Defaults to `1` if omitted. + :arg sort: The sort order of the documents. + :arg field: The field on which to run the aggregation. + :arg missing: The value to apply to documents that do not have a + value. By default, documents without a value are ignored. + :arg script: + """ + + name = "top_metrics" + + def __init__( + self, + *, + metrics: Union[ + "types.TopMetricsValue", + Sequence["types.TopMetricsValue"], + Sequence[Dict[str, Any]], + "DefaultType", + ] = DEFAULT, + size: Union[int, "DefaultType"] = DEFAULT, + sort: Union[ + Union[Union[str, "InstrumentedField"], "types.SortOptions"], + Sequence[Union[Union[str, "InstrumentedField"], "types.SortOptions"]], + Dict[str, Any], + "DefaultType", + ] = DEFAULT, + field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + missing: Union[str, int, float, bool, "DefaultType"] = DEFAULT, + script: Union["types.Script", Dict[str, Any], "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + metrics=metrics, + size=size, + sort=sort, + field=field, + missing=missing, + script=script, + **kwargs, + ) + + +class ValueCount(Agg[_R]): + """ + A single-value metrics aggregation that counts the number of values + that are extracted from the aggregated documents. + + :arg format: + :arg field: The field on which to run the aggregation. + :arg missing: The value to apply to documents that do not have a + value. By default, documents without a value are ignored. + :arg script: + """ + + name = "value_count" + + def __init__( + self, + *, + format: Union[str, "DefaultType"] = DEFAULT, + field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + missing: Union[str, int, float, bool, "DefaultType"] = DEFAULT, + script: Union["types.Script", Dict[str, Any], "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + format=format, field=field, missing=missing, script=script, **kwargs + ) + + +class WeightedAvg(Agg[_R]): + """ + A single-value metrics aggregation that computes the weighted average + of numeric values that are extracted from the aggregated documents. + + :arg format: A numeric response formatter. + :arg value: Configuration for the field that provides the values. + :arg value_type: + :arg weight: Configuration for the field or script that provides the + weights. + """ + + name = "weighted_avg" + + def __init__( + self, + *, + format: Union[str, "DefaultType"] = DEFAULT, + value: Union[ + "types.WeightedAverageValue", Dict[str, Any], "DefaultType" + ] = DEFAULT, + value_type: Union[ + Literal[ + "string", + "long", + "double", + "number", + "date", + "date_nanos", + "ip", + "numeric", + "geo_point", + "boolean", + ], + "DefaultType", + ] = DEFAULT, + weight: Union[ + "types.WeightedAverageValue", Dict[str, Any], "DefaultType" + ] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + format=format, value=value, value_type=value_type, weight=weight, **kwargs + ) + + +class VariableWidthHistogram(Bucket[_R]): + """ + A multi-bucket aggregation similar to the histogram, except instead of + providing an interval to use as the width of each bucket, a target + number of buckets is provided. + + :arg field: The name of the field. + :arg buckets: The target number of buckets. Defaults to `10` if + omitted. + :arg shard_size: The number of buckets that the coordinating node will + request from each shard. Defaults to `buckets * 50`. + :arg initial_buffer: Specifies the number of individual documents that + will be stored in memory on a shard before the initial bucketing + algorithm is run. Defaults to `min(10 * shard_size, 50000)`. + :arg script: + """ + + name = "variable_width_histogram" + + def __init__( + self, + *, + field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + buckets: Union[int, "DefaultType"] = DEFAULT, + shard_size: Union[int, "DefaultType"] = DEFAULT, + initial_buffer: Union[int, "DefaultType"] = DEFAULT, + script: Union["types.Script", Dict[str, Any], "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + field=field, + buckets=buckets, + shard_size=shard_size, + initial_buffer=initial_buffer, + script=script, + **kwargs, + ) + + def result(self, search: "SearchBase[_R]", data: Any) -> AttrDict[Any]: + return FieldBucketData(self, search, data) diff --git a/elasticsearch/dsl/analysis.py b/elasticsearch/dsl/analysis.py new file mode 100644 index 000000000..a810064e0 --- /dev/null +++ b/elasticsearch/dsl/analysis.py @@ -0,0 +1,341 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import Any, ClassVar, Dict, List, Optional, Union, cast + +from . import async_connections, connections +from .utils import AsyncUsingType, AttrDict, DslBase, UsingType, merge + +__all__ = ["tokenizer", "analyzer", "char_filter", "token_filter", "normalizer"] + + +class AnalysisBase: + @classmethod + def _type_shortcut( + cls, + name_or_instance: Union[str, "AnalysisBase"], + type: Optional[str] = None, + **kwargs: Any, + ) -> DslBase: + if isinstance(name_or_instance, cls): + if type or kwargs: + raise ValueError(f"{cls.__name__}() cannot accept parameters.") + return name_or_instance # type: ignore[return-value] + + if not (type or kwargs): + return cls.get_dsl_class("builtin")(name_or_instance) # type: ignore + + return cls.get_dsl_class(type, "custom")( # type: ignore + name_or_instance, type or "custom", **kwargs + ) + + +class CustomAnalysis: + name = "custom" + + def __init__(self, filter_name: str, builtin_type: str = "custom", **kwargs: Any): + self._builtin_type = builtin_type + self._name = filter_name + super().__init__(**kwargs) + + def to_dict(self) -> Dict[str, Any]: + # only name to present in lists + return self._name # type: ignore + + def get_definition(self) -> Dict[str, Any]: + d = super().to_dict() # type: ignore + d = d.pop(self.name) + d["type"] = self._builtin_type + return d # type: ignore + + +class CustomAnalysisDefinition(CustomAnalysis): + _type_name: str + _param_defs: ClassVar[Dict[str, Any]] + filter: List[Any] + char_filter: List[Any] + + def get_analysis_definition(self) -> Dict[str, Any]: + out = {self._type_name: {self._name: self.get_definition()}} + + t = cast("Tokenizer", getattr(self, "tokenizer", None)) + if "tokenizer" in self._param_defs and hasattr(t, "get_definition"): + out["tokenizer"] = {t._name: t.get_definition()} + + filters = { + f._name: f.get_definition() + for f in self.filter + if hasattr(f, "get_definition") + } + if filters: + out["filter"] = filters + + # any sub filter definitions like multiplexers etc? + for f in self.filter: + if hasattr(f, "get_analysis_definition"): + d = f.get_analysis_definition() + if d: + merge(out, d, True) + + char_filters = { + f._name: f.get_definition() + for f in self.char_filter + if hasattr(f, "get_definition") + } + if char_filters: + out["char_filter"] = char_filters + + return out + + +class BuiltinAnalysis: + name = "builtin" + + def __init__(self, name: str): + self._name = name + super().__init__() + + def to_dict(self) -> Dict[str, Any]: + # only name to present in lists + return self._name # type: ignore + + +class Analyzer(AnalysisBase, DslBase): + _type_name = "analyzer" + name = "" + + +class BuiltinAnalyzer(BuiltinAnalysis, Analyzer): + def get_analysis_definition(self) -> Dict[str, Any]: + return {} + + +class CustomAnalyzer(CustomAnalysisDefinition, Analyzer): + _param_defs = { + "filter": {"type": "token_filter", "multi": True}, + "char_filter": {"type": "char_filter", "multi": True}, + "tokenizer": {"type": "tokenizer"}, + } + + def _get_body( + self, text: str, explain: bool, attributes: Optional[Dict[str, Any]] + ) -> Dict[str, Any]: + body = {"text": text, "explain": explain} + if attributes: + body["attributes"] = attributes + + definition = self.get_analysis_definition() + analyzer_def = self.get_definition() + + for section in ("tokenizer", "char_filter", "filter"): + if section not in analyzer_def: + continue + sec_def = definition.get(section, {}) + sec_names = analyzer_def[section] + + if isinstance(sec_names, str): + body[section] = sec_def.get(sec_names, sec_names) + else: + body[section] = [ + sec_def.get(sec_name, sec_name) for sec_name in sec_names + ] + + if self._builtin_type != "custom": + body["analyzer"] = self._builtin_type + + return body + + def simulate( + self, + text: str, + using: UsingType = "default", + explain: bool = False, + attributes: Optional[Dict[str, Any]] = None, + ) -> AttrDict[Any]: + """ + Use the Analyze API of elasticsearch to test the outcome of this analyzer. + + :arg text: Text to be analyzed + :arg using: connection alias to use, defaults to ``'default'`` + :arg explain: will output all token attributes for each token. You can + filter token attributes you want to output by setting ``attributes`` + option. + :arg attributes: if ``explain`` is specified, filter the token + attributes to return. + """ + es = connections.get_connection(using) + return AttrDict( + cast( + Dict[str, Any], + es.indices.analyze(body=self._get_body(text, explain, attributes)), + ) + ) + + async def async_simulate( + self, + text: str, + using: AsyncUsingType = "default", + explain: bool = False, + attributes: Optional[Dict[str, Any]] = None, + ) -> AttrDict[Any]: + """ + Use the Analyze API of elasticsearch to test the outcome of this analyzer. + + :arg text: Text to be analyzed + :arg using: connection alias to use, defaults to ``'default'`` + :arg explain: will output all token attributes for each token. You can + filter token attributes you want to output by setting ``attributes`` + option. + :arg attributes: if ``explain`` is specified, filter the token + attributes to return. + """ + es = async_connections.get_connection(using) + return AttrDict( + cast( + Dict[str, Any], + await es.indices.analyze( + body=self._get_body(text, explain, attributes) + ), + ) + ) + + +class Normalizer(AnalysisBase, DslBase): + _type_name = "normalizer" + name = "" + + +class BuiltinNormalizer(BuiltinAnalysis, Normalizer): + def get_analysis_definition(self) -> Dict[str, Any]: + return {} + + +class CustomNormalizer(CustomAnalysisDefinition, Normalizer): + _param_defs = { + "filter": {"type": "token_filter", "multi": True}, + "char_filter": {"type": "char_filter", "multi": True}, + } + + +class Tokenizer(AnalysisBase, DslBase): + _type_name = "tokenizer" + name = "" + + +class BuiltinTokenizer(BuiltinAnalysis, Tokenizer): + pass + + +class CustomTokenizer(CustomAnalysis, Tokenizer): + pass + + +class TokenFilter(AnalysisBase, DslBase): + _type_name = "token_filter" + name = "" + + +class BuiltinTokenFilter(BuiltinAnalysis, TokenFilter): + pass + + +class CustomTokenFilter(CustomAnalysis, TokenFilter): + pass + + +class MultiplexerTokenFilter(CustomTokenFilter): + name = "multiplexer" + + def get_definition(self) -> Dict[str, Any]: + d = super(CustomTokenFilter, self).get_definition() + + if "filters" in d: + d["filters"] = [ + # comma delimited string given by user + ( + fs + if isinstance(fs, str) + else + # list of strings or TokenFilter objects + ", ".join(f.to_dict() if hasattr(f, "to_dict") else f for f in fs) + ) + for fs in self.filters + ] + return d + + def get_analysis_definition(self) -> Dict[str, Any]: + if not hasattr(self, "filters"): + return {} + + fs: Dict[str, Any] = {} + d = {"filter": fs} + for filters in self.filters: + if isinstance(filters, str): + continue + fs.update( + { + f._name: f.get_definition() + for f in filters + if hasattr(f, "get_definition") + } + ) + return d + + +class ConditionalTokenFilter(CustomTokenFilter): + name = "condition" + + def get_definition(self) -> Dict[str, Any]: + d = super(CustomTokenFilter, self).get_definition() + if "filter" in d: + d["filter"] = [ + f.to_dict() if hasattr(f, "to_dict") else f for f in self.filter + ] + return d + + def get_analysis_definition(self) -> Dict[str, Any]: + if not hasattr(self, "filter"): + return {} + + return { + "filter": { + f._name: f.get_definition() + for f in self.filter + if hasattr(f, "get_definition") + } + } + + +class CharFilter(AnalysisBase, DslBase): + _type_name = "char_filter" + name = "" + + +class BuiltinCharFilter(BuiltinAnalysis, CharFilter): + pass + + +class CustomCharFilter(CustomAnalysis, CharFilter): + pass + + +# shortcuts for direct use +analyzer = Analyzer._type_shortcut +tokenizer = Tokenizer._type_shortcut +token_filter = TokenFilter._type_shortcut +char_filter = CharFilter._type_shortcut +normalizer = Normalizer._type_shortcut diff --git a/elasticsearch/dsl/async_connections.py b/elasticsearch/dsl/async_connections.py new file mode 100644 index 000000000..8a23d3828 --- /dev/null +++ b/elasticsearch/dsl/async_connections.py @@ -0,0 +1,37 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import Type + +from elasticsearch import AsyncElasticsearch + +from .connections import Connections + + +class AsyncElasticsearchConnections(Connections[AsyncElasticsearch]): + def __init__( + self, *, elasticsearch_class: Type[AsyncElasticsearch] = AsyncElasticsearch + ): + super().__init__(elasticsearch_class=elasticsearch_class) + + +connections = AsyncElasticsearchConnections(elasticsearch_class=AsyncElasticsearch) +configure = connections.configure +add_connection = connections.add_connection +remove_connection = connections.remove_connection +create_connection = connections.create_connection +get_connection = connections.get_connection diff --git a/elasticsearch/dsl/connections.py b/elasticsearch/dsl/connections.py new file mode 100644 index 000000000..a3d340967 --- /dev/null +++ b/elasticsearch/dsl/connections.py @@ -0,0 +1,144 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import Any, Dict, Generic, Type, TypeVar, Union + +from elasticsearch import Elasticsearch + +from .serializer import serializer + +_T = TypeVar("_T") + + +class Connections(Generic[_T]): + """ + Class responsible for holding connections to different clusters. Used as a + singleton in this module. + """ + + def __init__(self, *, elasticsearch_class: Type[_T]): + self._kwargs: Dict[str, Any] = {} + self._conns: Dict[str, _T] = {} + self.elasticsearch_class: Type[_T] = elasticsearch_class + + def configure(self, **kwargs: Any) -> None: + """ + Configure multiple connections at once, useful for passing in config + dictionaries obtained from other sources, like Django's settings or a + configuration management tool. + + Example:: + + connections.configure( + default={'hosts': 'localhost'}, + dev={'hosts': ['esdev1.example.com:9200'], 'sniff_on_start': True}, + ) + + Connections will only be constructed lazily when requested through + ``get_connection``. + """ + for k in list(self._conns): + # try and preserve existing client to keep the persistent connections alive + if k in self._kwargs and kwargs.get(k, None) == self._kwargs[k]: + continue + del self._conns[k] + self._kwargs = kwargs + + def add_connection(self, alias: str, conn: _T) -> None: + """ + Add a connection object, it will be passed through as-is. + """ + self._conns[alias] = self._with_user_agent(conn) + + def remove_connection(self, alias: str) -> None: + """ + Remove connection from the registry. Raises ``KeyError`` if connection + wasn't found. + """ + errors = 0 + for d in (self._conns, self._kwargs): + try: + del d[alias] + except KeyError: + errors += 1 + + if errors == 2: + raise KeyError(f"There is no connection with alias {alias!r}.") + + def create_connection(self, alias: str = "default", **kwargs: Any) -> _T: + """ + Construct an instance of ``elasticsearch.Elasticsearch`` and register + it under given alias. + """ + kwargs.setdefault("serializer", serializer) + conn = self._conns[alias] = self.elasticsearch_class(**kwargs) + return self._with_user_agent(conn) + + def get_connection(self, alias: Union[str, _T] = "default") -> _T: + """ + Retrieve a connection, construct it if necessary (only configuration + was passed to us). If a non-string alias has been passed through we + assume it's already a client instance and will just return it as-is. + + Raises ``KeyError`` if no client (or its definition) is registered + under the alias. + """ + # do not check isinstance(Elasticsearch) so that people can wrap their + # clients + if not isinstance(alias, str): + return self._with_user_agent(alias) + + # connection already established + try: + return self._conns[alias] + except KeyError: + pass + + # if not, try to create it + try: + return self.create_connection(alias, **self._kwargs[alias]) + except KeyError: + # no connection and no kwargs to set one up + raise KeyError(f"There is no connection with alias {alias!r}.") + + def _with_user_agent(self, conn: _T) -> _T: + from . import __versionstr__ # this is here to avoid circular imports + + # try to inject our user agent + if hasattr(conn, "_headers"): + is_frozen = conn._headers.frozen + if is_frozen: + conn._headers = conn._headers.copy() + conn._headers.update( + {"user-agent": f"elasticsearch-dsl-py/{__versionstr__}"} + ) + if is_frozen: + conn._headers.freeze() + return conn + + +class ElasticsearchConnections(Connections[Elasticsearch]): + def __init__(self, *, elasticsearch_class: Type[Elasticsearch] = Elasticsearch): + super().__init__(elasticsearch_class=elasticsearch_class) + + +connections = ElasticsearchConnections() +configure = connections.configure +add_connection = connections.add_connection +remove_connection = connections.remove_connection +create_connection = connections.create_connection +get_connection = connections.get_connection diff --git a/elasticsearch/dsl/document.py b/elasticsearch/dsl/document.py new file mode 100644 index 000000000..c27c5af04 --- /dev/null +++ b/elasticsearch/dsl/document.py @@ -0,0 +1,20 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from ._async.document import AsyncDocument # noqa: F401 +from ._sync.document import Document # noqa: F401 +from .document_base import InnerDoc, MetaField # noqa: F401 diff --git a/elasticsearch/dsl/document_base.py b/elasticsearch/dsl/document_base.py new file mode 100644 index 000000000..a7026778a --- /dev/null +++ b/elasticsearch/dsl/document_base.py @@ -0,0 +1,444 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from datetime import date, datetime +from fnmatch import fnmatch +from typing import ( + TYPE_CHECKING, + Any, + Callable, + ClassVar, + Dict, + Generic, + List, + Optional, + Tuple, + TypeVar, + Union, + get_args, + overload, +) + +try: + from types import UnionType # type: ignore[attr-defined] +except ImportError: + UnionType = None + +from typing_extensions import dataclass_transform + +from .exceptions import ValidationException +from .field import Binary, Boolean, Date, Field, Float, Integer, Nested, Object, Text +from .mapping import Mapping +from .utils import DOC_META_FIELDS, ObjectBase + +if TYPE_CHECKING: + from elastic_transport import ObjectApiResponse + + from .index_base import IndexBase + + +class MetaField: + def __init__(self, *args: Any, **kwargs: Any): + self.args, self.kwargs = args, kwargs + + +class InstrumentedField: + """Proxy object for a mapped document field. + + An object of this instance is returned when a field is accessed as a class + attribute of a ``Document`` or ``InnerDoc`` subclass. These objects can + be used in any situation in which a reference to a field is required, such + as when specifying sort options in a search:: + + class MyDocument(Document): + name: str + + s = MyDocument.search() + s = s.sort(-MyDocument.name) # sort by name in descending order + """ + + def __init__(self, name: str, field: Field): + self._name = name + self._field = field + + # note that the return value type here assumes classes will only be used to + # access fields (I haven't found a way to make this type dynamic based on a + # decision taken at runtime) + def __getattr__(self, attr: str) -> "InstrumentedField": + try: + # first let's see if this is an attribute of this object + return super().__getattribute__(attr) # type: ignore + except AttributeError: + try: + # next we see if we have a sub-field with this name + return InstrumentedField(f"{self._name}.{attr}", self._field[attr]) + except KeyError: + # lastly we let the wrapped field resolve this attribute + return getattr(self._field, attr) # type: ignore + + def __pos__(self) -> str: + """Return the field name representation for ascending sort order""" + return f"{self._name}" + + def __neg__(self) -> str: + """Return the field name representation for descending sort order""" + return f"-{self._name}" + + def __str__(self) -> str: + return self._name + + def __repr__(self) -> str: + return f"InstrumentedField[{self._name}]" + + +class DocumentMeta(type): + _doc_type: "DocumentOptions" + _index: "IndexBase" + + def __new__( + cls, name: str, bases: Tuple[type, ...], attrs: Dict[str, Any] + ) -> "DocumentMeta": + # DocumentMeta filters attrs in place + attrs["_doc_type"] = DocumentOptions(name, bases, attrs) + return super().__new__(cls, name, bases, attrs) + + def __getattr__(cls, attr: str) -> Any: + if attr in cls._doc_type.mapping: + return InstrumentedField(attr, cls._doc_type.mapping[attr]) + return super().__getattribute__(attr) + + +class DocumentOptions: + type_annotation_map = { + int: (Integer, {}), + float: (Float, {}), + bool: (Boolean, {}), + str: (Text, {}), + bytes: (Binary, {}), + datetime: (Date, {}), + date: (Date, {"format": "yyyy-MM-dd"}), + } + + def __init__(self, name: str, bases: Tuple[type, ...], attrs: Dict[str, Any]): + meta = attrs.pop("Meta", None) + + # create the mapping instance + self.mapping: Mapping = getattr(meta, "mapping", Mapping()) + + # register the document's fields, which can be given in a few formats: + # + # class MyDocument(Document): + # # required field using native typing + # # (str, int, float, bool, datetime, date) + # field1: str + # + # # optional field using native typing + # field2: Optional[datetime] + # + # # array field using native typing + # field3: list[int] + # + # # sub-object, same as Object(MyInnerDoc) + # field4: MyInnerDoc + # + # # nested sub-objects, same as Nested(MyInnerDoc) + # field5: list[MyInnerDoc] + # + # # use typing, but override with any stock or custom field + # field6: bool = MyCustomField() + # + # # best mypy and pyright support and dataclass-like behavior + # field7: M[date] + # field8: M[str] = mapped_field(MyCustomText(), default="foo") + # + # # legacy format without Python typing + # field9 = Text() + # + # # ignore attributes + # field10: ClassVar[string] = "a regular class variable" + annotations = attrs.get("__annotations__", {}) + fields = set([n for n in attrs if isinstance(attrs[n], Field)]) + fields.update(annotations.keys()) + field_defaults = {} + for name in fields: + value: Any = None + required = None + multi = None + if name in annotations: + # the field has a type annotation, so next we try to figure out + # what field type we can use + type_ = annotations[name] + skip = False + required = True + multi = False + while hasattr(type_, "__origin__"): + if type_.__origin__ == ClassVar: + skip = True + break + elif type_.__origin__ == Mapped: + # M[type] -> extract the wrapped type + type_ = type_.__args__[0] + elif type_.__origin__ == Union: + if len(type_.__args__) == 2 and type_.__args__[1] is type(None): + # Optional[type] -> mark instance as optional + required = False + type_ = type_.__args__[0] + else: + raise TypeError("Unsupported union") + elif type_.__origin__ in [list, List]: + # List[type] -> mark instance as multi + multi = True + required = False + type_ = type_.__args__[0] + else: + break + if skip or type_ == ClassVar: + # skip ClassVar attributes + continue + if type(type_) is UnionType: + # a union given with the pipe syntax + args = get_args(type_) + if len(args) == 2 and args[1] is type(None): + required = False + type_ = type_.__args__[0] + else: + raise TypeError("Unsupported union") + field = None + field_args: List[Any] = [] + field_kwargs: Dict[str, Any] = {} + if isinstance(type_, type) and issubclass(type_, InnerDoc): + # object or nested field + field = Nested if multi else Object + field_args = [type_] + elif type_ in self.type_annotation_map: + # use best field type for the type hint provided + field, field_kwargs = self.type_annotation_map[type_] # type: ignore + + if field: + field_kwargs = { + "multi": multi, + "required": required, + **field_kwargs, + } + value = field(*field_args, **field_kwargs) + + if name in attrs: + # this field has a right-side value, which can be field + # instance on its own or wrapped with mapped_field() + attr_value = attrs[name] + if isinstance(attr_value, dict): + # the mapped_field() wrapper function was used so we need + # to look for the field instance and also record any + # dataclass-style defaults + attr_value = attrs[name].get("_field") + default_value = attrs[name].get("default") or attrs[name].get( + "default_factory" + ) + if default_value: + field_defaults[name] = default_value + if attr_value: + value = attr_value + if required is not None: + value._required = required + if multi is not None: + value._multi = multi + + if value is None: + raise TypeError(f"Cannot map field {name}") + + self.mapping.field(name, value) + if name in attrs: + del attrs[name] + + # store dataclass-style defaults for ObjectBase.__init__ to assign + attrs["_defaults"] = field_defaults + + # add all the mappings for meta fields + for name in dir(meta): + if isinstance(getattr(meta, name, None), MetaField): + params = getattr(meta, name) + self.mapping.meta(name, *params.args, **params.kwargs) + + # document inheritance - include the fields from parents' mappings + for b in bases: + if hasattr(b, "_doc_type") and hasattr(b._doc_type, "mapping"): + self.mapping.update(b._doc_type.mapping, update_only=True) + + @property + def name(self) -> str: + return self.mapping.properties.name + + +_FieldType = TypeVar("_FieldType") + + +class Mapped(Generic[_FieldType]): + """Class that represents the type of a mapped field. + + This class can be used as an optional wrapper on a field type to help type + checkers assign the correct type when the field is used as a class + attribute. + + Consider the following definitions:: + + class MyDocument(Document): + first: str + second: M[str] + + mydoc = MyDocument(first="1", second="2") + + Type checkers have no trouble inferring the type of both ``mydoc.first`` + and ``mydoc.second`` as ``str``, but while ``MyDocument.first`` will be + incorrectly typed as ``str``, ``MyDocument.second`` should be assigned the + correct ``InstrumentedField`` type. + """ + + __slots__: Dict[str, Any] = {} + + if TYPE_CHECKING: + + @overload + def __get__(self, instance: None, owner: Any) -> InstrumentedField: ... + + @overload + def __get__(self, instance: object, owner: Any) -> _FieldType: ... + + def __get__( + self, instance: Optional[object], owner: Any + ) -> Union[InstrumentedField, _FieldType]: ... + + def __set__(self, instance: Optional[object], value: _FieldType) -> None: ... + + def __delete__(self, instance: Any) -> None: ... + + +M = Mapped + + +def mapped_field( + field: Optional[Field] = None, + *, + init: bool = True, + default: Any = None, + default_factory: Optional[Callable[[], Any]] = None, + **kwargs: Any, +) -> Any: + """Construct a field using dataclass behaviors + + This function can be used in the right side of a document field definition + as a wrapper for the field instance or as a way to provide dataclass-compatible + options. + + :param field: The instance of ``Field`` to use for this field. If not provided, + an instance that is appropriate for the type given to the field is used. + :param init: a value of ``True`` adds this field to the constructor, and a + value of ``False`` omits it from it. The default is ``True``. + :param default: a default value to use for this field when one is not provided + explicitly. + :param default_factory: a callable that returns a default value for the field, + when one isn't provided explicitly. Only one of ``factory`` and + ``default_factory`` can be used. + """ + return { + "_field": field, + "init": init, + "default": default, + "default_factory": default_factory, + **kwargs, + } + + +@dataclass_transform(field_specifiers=(mapped_field,)) +class InnerDoc(ObjectBase, metaclass=DocumentMeta): + """ + Common class for inner documents like Object or Nested + """ + + @classmethod + def from_es( + cls, + data: Union[Dict[str, Any], "ObjectApiResponse[Any]"], + data_only: bool = False, + ) -> "InnerDoc": + if data_only: + data = {"_source": data} + return super().from_es(data) + + +class DocumentBase(ObjectBase): + """ + Model-like class for persisting documents in elasticsearch. + """ + + @classmethod + def _matches(cls, hit: Dict[str, Any]) -> bool: + if cls._index._name is None: + return True + return fnmatch(hit.get("_index", ""), cls._index._name) + + @classmethod + def _default_index(cls, index: Optional[str] = None) -> str: + return index or cls._index._name + + def _get_index( + self, index: Optional[str] = None, required: bool = True + ) -> Optional[str]: + if index is None: + index = getattr(self.meta, "index", None) + if index is None: + index = getattr(self._index, "_name", None) + if index is None and required: + raise ValidationException("No index") + if index and "*" in index: + raise ValidationException("You cannot write to a wildcard index.") + return index + + def __repr__(self) -> str: + return "{}({})".format( + self.__class__.__name__, + ", ".join( + f"{key}={getattr(self.meta, key)!r}" + for key in ("index", "id") + if key in self.meta + ), + ) + + def to_dict(self, include_meta: bool = False, skip_empty: bool = True) -> Dict[str, Any]: # type: ignore[override] + """ + Serialize the instance into a dictionary so that it can be saved in elasticsearch. + + :arg include_meta: if set to ``True`` will include all the metadata + (``_index``, ``_id`` etc). Otherwise just the document's + data is serialized. This is useful when passing multiple instances into + ``elasticsearch.helpers.bulk``. + :arg skip_empty: if set to ``False`` will cause empty values (``None``, + ``[]``, ``{}``) to be left on the document. Those values will be + stripped out otherwise as they make no difference in elasticsearch. + """ + d = super().to_dict(skip_empty=skip_empty) + if not include_meta: + return d + + meta = {"_" + k: self.meta[k] for k in DOC_META_FIELDS if k in self.meta} + + # in case of to_dict include the index unlike save/update/delete + index = self._get_index(required=False) + if index is not None: + meta["_index"] = index + + meta["_source"] = d + return meta diff --git a/elasticsearch/dsl/exceptions.py b/elasticsearch/dsl/exceptions.py new file mode 100644 index 000000000..8aae0ffa8 --- /dev/null +++ b/elasticsearch/dsl/exceptions.py @@ -0,0 +1,32 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + + +class ElasticsearchDslException(Exception): + pass + + +class UnknownDslObject(ElasticsearchDslException): + pass + + +class ValidationException(ValueError, ElasticsearchDslException): + pass + + +class IllegalOperation(ElasticsearchDslException): + pass diff --git a/elasticsearch/dsl/faceted_search.py b/elasticsearch/dsl/faceted_search.py new file mode 100644 index 000000000..96941b08c --- /dev/null +++ b/elasticsearch/dsl/faceted_search.py @@ -0,0 +1,28 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from ._async.faceted_search import AsyncFacetedSearch # noqa: F401 +from ._sync.faceted_search import FacetedSearch # noqa: F401 +from .faceted_search_base import ( # noqa: F401 + DateHistogramFacet, + Facet, + FacetedResponse, + HistogramFacet, + NestedFacet, + RangeFacet, + TermsFacet, +) diff --git a/elasticsearch/dsl/faceted_search_base.py b/elasticsearch/dsl/faceted_search_base.py new file mode 100644 index 000000000..ee6fed2f9 --- /dev/null +++ b/elasticsearch/dsl/faceted_search_base.py @@ -0,0 +1,489 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from datetime import datetime, timedelta +from typing import ( + TYPE_CHECKING, + Any, + Dict, + Generic, + List, + Optional, + Sequence, + Tuple, + Type, + Union, + cast, +) + +from typing_extensions import Self + +from .aggs import A, Agg +from .query import MatchAll, Nested, Query, Range, Terms +from .response import Response +from .utils import _R, AttrDict + +if TYPE_CHECKING: + from .document_base import DocumentBase + from .response.aggs import BucketData + from .search_base import SearchBase + +FilterValueType = Union[str, datetime, Sequence[str]] + +__all__ = [ + "FacetedSearchBase", + "HistogramFacet", + "TermsFacet", + "DateHistogramFacet", + "RangeFacet", + "NestedFacet", +] + + +class Facet(Generic[_R]): + """ + A facet on faceted search. Wraps and aggregation and provides functionality + to create a filter for selected values and return a list of facet values + from the result of the aggregation. + """ + + agg_type: str = "" + + def __init__( + self, metric: Optional[Agg[_R]] = None, metric_sort: str = "desc", **kwargs: Any + ): + self.filter_values = () + self._params = kwargs + self._metric = metric + if metric and metric_sort: + self._params["order"] = {"metric": metric_sort} + + def get_aggregation(self) -> Agg[_R]: + """ + Return the aggregation object. + """ + agg: Agg[_R] = A(self.agg_type, **self._params) + if self._metric: + agg.metric("metric", self._metric) + return agg + + def add_filter(self, filter_values: List[FilterValueType]) -> Optional[Query]: + """ + Construct a filter. + """ + if not filter_values: + return None + + f = self.get_value_filter(filter_values[0]) + for v in filter_values[1:]: + f |= self.get_value_filter(v) + return f + + def get_value_filter(self, filter_value: FilterValueType) -> Query: # type: ignore + """ + Construct a filter for an individual value + """ + pass + + def is_filtered(self, key: str, filter_values: List[FilterValueType]) -> bool: + """ + Is a filter active on the given key. + """ + return key in filter_values + + def get_value(self, bucket: "BucketData[_R]") -> Any: + """ + return a value representing a bucket. Its key as default. + """ + return bucket["key"] + + def get_metric(self, bucket: "BucketData[_R]") -> int: + """ + Return a metric, by default doc_count for a bucket. + """ + if self._metric: + return cast(int, bucket["metric"]["value"]) + return cast(int, bucket["doc_count"]) + + def get_values( + self, data: "BucketData[_R]", filter_values: List[FilterValueType] + ) -> List[Tuple[Any, int, bool]]: + """ + Turn the raw bucket data into a list of tuples containing the key, + number of documents and a flag indicating whether this value has been + selected or not. + """ + out = [] + for bucket in data.buckets: + b = cast("BucketData[_R]", bucket) + key = self.get_value(b) + out.append((key, self.get_metric(b), self.is_filtered(key, filter_values))) + return out + + +class TermsFacet(Facet[_R]): + agg_type = "terms" + + def add_filter(self, filter_values: List[FilterValueType]) -> Optional[Query]: + """Create a terms filter instead of bool containing term filters.""" + if filter_values: + return Terms(self._params["field"], filter_values, _expand__to_dot=False) + return None + + +class RangeFacet(Facet[_R]): + agg_type = "range" + + def _range_to_dict( + self, range: Tuple[Any, Tuple[Optional[int], Optional[int]]] + ) -> Dict[str, Any]: + key, _range = range + out: Dict[str, Any] = {"key": key} + if _range[0] is not None: + out["from"] = _range[0] + if _range[1] is not None: + out["to"] = _range[1] + return out + + def __init__( + self, + ranges: Sequence[Tuple[Any, Tuple[Optional[int], Optional[int]]]], + **kwargs: Any, + ): + super().__init__(**kwargs) + self._params["ranges"] = list(map(self._range_to_dict, ranges)) + self._params["keyed"] = False + self._ranges = dict(ranges) + + def get_value_filter(self, filter_value: FilterValueType) -> Query: + f, t = self._ranges[filter_value] + limits: Dict[str, Any] = {} + if f is not None: + limits["gte"] = f + if t is not None: + limits["lt"] = t + + return Range(self._params["field"], limits, _expand__to_dot=False) + + +class HistogramFacet(Facet[_R]): + agg_type = "histogram" + + def get_value_filter(self, filter_value: FilterValueType) -> Range: + return Range( + self._params["field"], + { + "gte": filter_value, + "lt": filter_value + self._params["interval"], + }, + _expand__to_dot=False, + ) + + +def _date_interval_year(d: datetime) -> datetime: + return d.replace( + year=d.year + 1, day=(28 if d.month == 2 and d.day == 29 else d.day) + ) + + +def _date_interval_month(d: datetime) -> datetime: + return (d + timedelta(days=32)).replace(day=1) + + +def _date_interval_week(d: datetime) -> datetime: + return d + timedelta(days=7) + + +def _date_interval_day(d: datetime) -> datetime: + return d + timedelta(days=1) + + +def _date_interval_hour(d: datetime) -> datetime: + return d + timedelta(hours=1) + + +class DateHistogramFacet(Facet[_R]): + agg_type = "date_histogram" + + DATE_INTERVALS = { + "year": _date_interval_year, + "1Y": _date_interval_year, + "month": _date_interval_month, + "1M": _date_interval_month, + "week": _date_interval_week, + "1w": _date_interval_week, + "day": _date_interval_day, + "1d": _date_interval_day, + "hour": _date_interval_hour, + "1h": _date_interval_hour, + } + + def __init__(self, **kwargs: Any): + kwargs.setdefault("min_doc_count", 0) + super().__init__(**kwargs) + + def get_value(self, bucket: "BucketData[_R]") -> Any: + if not isinstance(bucket["key"], datetime): + # Elasticsearch returns key=None instead of 0 for date 1970-01-01, + # so we need to set key to 0 to avoid TypeError exception + if bucket["key"] is None: + bucket["key"] = 0 + # Preserve milliseconds in the datetime + return datetime.utcfromtimestamp(int(cast(int, bucket["key"])) / 1000.0) + else: + return bucket["key"] + + def get_value_filter(self, filter_value: Any) -> Range: + for interval_type in ("calendar_interval", "fixed_interval"): + if interval_type in self._params: + break + else: + interval_type = "interval" + + return Range( + self._params["field"], + { + "gte": filter_value, + "lt": self.DATE_INTERVALS[self._params[interval_type]](filter_value), + }, + _expand__to_dot=False, + ) + + +class NestedFacet(Facet[_R]): + agg_type = "nested" + + def __init__(self, path: str, nested_facet: Facet[_R]): + self._path = path + self._inner = nested_facet + super().__init__(path=path, aggs={"inner": nested_facet.get_aggregation()}) + + def get_values( + self, data: "BucketData[_R]", filter_values: List[FilterValueType] + ) -> List[Tuple[Any, int, bool]]: + return self._inner.get_values(data.inner, filter_values) + + def add_filter(self, filter_values: List[FilterValueType]) -> Optional[Query]: + inner_q = self._inner.add_filter(filter_values) + if inner_q: + return Nested(path=self._path, query=inner_q) + return None + + +class FacetedResponse(Response[_R]): + if TYPE_CHECKING: + _faceted_search: "FacetedSearchBase[_R]" + _facets: Dict[str, List[Tuple[Any, int, bool]]] + + @property + def query_string(self) -> Optional[Union[str, Query]]: + return self._faceted_search._query + + @property + def facets(self) -> Dict[str, List[Tuple[Any, int, bool]]]: + if not hasattr(self, "_facets"): + super(AttrDict, self).__setattr__("_facets", AttrDict({})) + for name, facet in self._faceted_search.facets.items(): + self._facets[name] = facet.get_values( + getattr(getattr(self.aggregations, "_filter_" + name), name), + self._faceted_search.filter_values.get(name, []), + ) + return self._facets + + +class FacetedSearchBase(Generic[_R]): + """ + Abstraction for creating faceted navigation searches that takes care of + composing the queries, aggregations and filters as needed as well as + presenting the results in an easy-to-consume fashion:: + + class BlogSearch(FacetedSearch): + index = 'blogs' + doc_types = [Blog, Post] + fields = ['title^5', 'category', 'description', 'body'] + + facets = { + 'type': TermsFacet(field='_type'), + 'category': TermsFacet(field='category'), + 'weekly_posts': DateHistogramFacet(field='published_from', interval='week') + } + + def search(self): + ' Override search to add your own filters ' + s = super(BlogSearch, self).search() + return s.filter('term', published=True) + + # when using: + blog_search = BlogSearch("web framework", filters={"category": "python"}) + + # supports pagination + blog_search[10:20] + + response = blog_search.execute() + + # easy access to aggregation results: + for category, hit_count, is_selected in response.facets.category: + print( + "Category %s has %d hits%s." % ( + category, + hit_count, + ' and is chosen' if is_selected else '' + ) + ) + + """ + + index: Optional[str] = None + doc_types: Optional[List[Union[str, Type["DocumentBase"]]]] = None + fields: Sequence[str] = [] + facets: Dict[str, Facet[_R]] = {} + using = "default" + + if TYPE_CHECKING: + + def search(self) -> "SearchBase[_R]": ... + + def __init__( + self, + query: Optional[Union[str, Query]] = None, + filters: Dict[str, FilterValueType] = {}, + sort: Sequence[str] = [], + ): + """ + :arg query: the text to search for + :arg filters: facet values to filter + :arg sort: sort information to be passed to :class:`~elasticsearch.dsl.Search` + """ + self._query = query + self._filters: Dict[str, Query] = {} + self._sort = sort + self.filter_values: Dict[str, List[FilterValueType]] = {} + for name, value in filters.items(): + self.add_filter(name, value) + + self._s = self.build_search() + + def __getitem__(self, k: Union[int, slice]) -> Self: + self._s = self._s[k] + return self + + def add_filter( + self, name: str, filter_values: Union[FilterValueType, List[FilterValueType]] + ) -> None: + """ + Add a filter for a facet. + """ + # normalize the value into a list + if not isinstance(filter_values, (tuple, list)): + if filter_values is None: + return + filter_values = [ + filter_values, + ] + + # remember the filter values for use in FacetedResponse + self.filter_values[name] = filter_values # type: ignore[assignment] + + # get the filter from the facet + f = self.facets[name].add_filter(filter_values) # type: ignore[arg-type] + if f is None: + return + + self._filters[name] = f + + def query( + self, search: "SearchBase[_R]", query: Union[str, Query] + ) -> "SearchBase[_R]": + """ + Add query part to ``search``. + + Override this if you wish to customize the query used. + """ + if query: + if self.fields: + return search.query("multi_match", fields=self.fields, query=query) + else: + return search.query("multi_match", query=query) + return search + + def aggregate(self, search: "SearchBase[_R]") -> None: + """ + Add aggregations representing the facets selected, including potential + filters. + """ + for f, facet in self.facets.items(): + agg = facet.get_aggregation() + agg_filter: Query = MatchAll() + for field, filter in self._filters.items(): + if f == field: + continue + agg_filter &= filter + search.aggs.bucket("_filter_" + f, "filter", filter=agg_filter).bucket( + f, agg + ) + + def filter(self, search: "SearchBase[_R]") -> "SearchBase[_R]": + """ + Add a ``post_filter`` to the search request narrowing the results based + on the facet filters. + """ + if not self._filters: + return search + + post_filter: Query = MatchAll() + for f in self._filters.values(): + post_filter &= f + return search.post_filter(post_filter) + + def highlight(self, search: "SearchBase[_R]") -> "SearchBase[_R]": + """ + Add highlighting for all the fields + """ + return search.highlight( + *(f if "^" not in f else f.split("^", 1)[0] for f in self.fields) + ) + + def sort(self, search: "SearchBase[_R]") -> "SearchBase[_R]": + """ + Add sorting information to the request. + """ + if self._sort: + search = search.sort(*self._sort) + return search + + def params(self, **kwargs: Any) -> None: + """ + Specify query params to be used when executing the search. All the + keyword arguments will override the current values. See + https://elasticsearch-py.readthedocs.io/en/master/api.html#elasticsearch.Elasticsearch.search + for all available parameters. + """ + self._s = self._s.params(**kwargs) + + def build_search(self) -> "SearchBase[_R]": + """ + Construct the ``Search`` object. + """ + s = self.search() + if self._query is not None: + s = self.query(s, self._query) + s = self.filter(s) + if self.fields: + s = self.highlight(s) + s = self.sort(s) + self.aggregate(s) + return s diff --git a/elasticsearch/dsl/field.py b/elasticsearch/dsl/field.py new file mode 100644 index 000000000..55ab4f7f9 --- /dev/null +++ b/elasticsearch/dsl/field.py @@ -0,0 +1,587 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import base64 +import collections.abc +import ipaddress +from copy import deepcopy +from datetime import date, datetime +from typing import ( + TYPE_CHECKING, + Any, + Dict, + Iterable, + Iterator, + Optional, + Tuple, + Type, + Union, + cast, +) + +from dateutil import parser, tz + +from .exceptions import ValidationException +from .query import Q +from .utils import AttrDict, AttrList, DslBase +from .wrappers import Range + +if TYPE_CHECKING: + from datetime import tzinfo + from ipaddress import IPv4Address, IPv6Address + + from _operator import _SupportsComparison + + from .document import InnerDoc + from .mapping_base import MappingBase + from .query import Query + +unicode = str + + +def construct_field( + name_or_field: Union[ + str, + "Field", + Dict[str, Any], + ], + **params: Any, +) -> "Field": + # {"type": "text", "analyzer": "snowball"} + if isinstance(name_or_field, collections.abc.Mapping): + if params: + raise ValueError( + "construct_field() cannot accept parameters when passing in a dict." + ) + params = deepcopy(name_or_field) + if "type" not in params: + # inner object can be implicitly defined + if "properties" in params: + name = "object" + else: + raise ValueError('construct_field() needs to have a "type" key.') + else: + name = params.pop("type") + return Field.get_dsl_class(name)(**params) + + # Text() + if isinstance(name_or_field, Field): + if params: + raise ValueError( + "construct_field() cannot accept parameters " + "when passing in a construct_field object." + ) + return name_or_field + + # "text", analyzer="snowball" + return Field.get_dsl_class(name_or_field)(**params) + + +class Field(DslBase): + _type_name = "field" + _type_shortcut = staticmethod(construct_field) + # all fields can be multifields + _param_defs = {"fields": {"type": "field", "hash": True}} + name = "" + _coerce = False + + def __init__( + self, multi: bool = False, required: bool = False, *args: Any, **kwargs: Any + ): + """ + :arg bool multi: specifies whether field can contain array of values + :arg bool required: specifies whether field is required + """ + self._multi = multi + self._required = required + super().__init__(*args, **kwargs) + + def __getitem__(self, subfield: str) -> "Field": + return cast(Field, self._params.get("fields", {})[subfield]) + + def _serialize(self, data: Any) -> Any: + return data + + def _deserialize(self, data: Any) -> Any: + return data + + def _empty(self) -> Optional[Any]: + return None + + def empty(self) -> Optional[Any]: + if self._multi: + return AttrList([]) + return self._empty() + + def serialize(self, data: Any) -> Any: + if isinstance(data, (list, AttrList, tuple)): + return list(map(self._serialize, cast(Iterable[Any], data))) + return self._serialize(data) + + def deserialize(self, data: Any) -> Any: + if isinstance(data, (list, AttrList, tuple)): + data = [ + None if d is None else self._deserialize(d) + for d in cast(Iterable[Any], data) + ] + return data + if data is None: + return None + return self._deserialize(data) + + def clean(self, data: Any) -> Any: + if data is not None: + data = self.deserialize(data) + if data in (None, [], {}) and self._required: + raise ValidationException("Value required for this field.") + return data + + def to_dict(self) -> Dict[str, Any]: + d = super().to_dict() + name, value = cast(Tuple[str, Dict[str, Any]], d.popitem()) + value["type"] = name + return value + + +class CustomField(Field): + name = "custom" + _coerce = True + + def to_dict(self) -> Dict[str, Any]: + if isinstance(self.builtin_type, Field): + return self.builtin_type.to_dict() + + d = super().to_dict() + d["type"] = self.builtin_type + return d + + +class Object(Field): + name = "object" + _coerce = True + + def __init__( + self, + doc_class: Optional[Type["InnerDoc"]] = None, + dynamic: Optional[Union[bool, str]] = None, + properties: Optional[Dict[str, Any]] = None, + **kwargs: Any, + ): + """ + :arg document.InnerDoc doc_class: base doc class that handles mapping. + If no `doc_class` is provided, new instance of `InnerDoc` will be created, + populated with `properties` and used. Can not be provided together with `properties` + :arg dynamic: whether new properties may be created dynamically. + Valid values are `True`, `False`, `'strict'`. + Can not be provided together with `doc_class`. + See https://www.elastic.co/guide/en/elasticsearch/reference/current/dynamic.html + for more details + :arg dict properties: used to construct underlying mapping if no `doc_class` is provided. + Can not be provided together with `doc_class` + """ + if doc_class and (properties or dynamic is not None): + raise ValidationException( + "doc_class and properties/dynamic should not be provided together" + ) + if doc_class: + self._doc_class: Type["InnerDoc"] = doc_class + else: + # FIXME import + from .document import InnerDoc + + # no InnerDoc subclass, creating one instead... + self._doc_class = type("InnerDoc", (InnerDoc,), {}) + for name, field in (properties or {}).items(): + self._doc_class._doc_type.mapping.field(name, field) + if dynamic is not None: + self._doc_class._doc_type.mapping.meta("dynamic", dynamic) + + self._mapping: "MappingBase" = deepcopy(self._doc_class._doc_type.mapping) + super().__init__(**kwargs) + + def __getitem__(self, name: str) -> Field: + return self._mapping[name] + + def __contains__(self, name: str) -> bool: + return name in self._mapping + + def _empty(self) -> "InnerDoc": + return self._wrap({}) + + def _wrap(self, data: Dict[str, Any]) -> "InnerDoc": + return self._doc_class.from_es(data, data_only=True) + + def empty(self) -> Union["InnerDoc", AttrList[Any]]: + if self._multi: + return AttrList[Any]([], self._wrap) + return self._empty() + + def to_dict(self) -> Dict[str, Any]: + d = self._mapping.to_dict() + d.update(super().to_dict()) + return d + + def _collect_fields(self) -> Iterator[Field]: + return self._mapping.properties._collect_fields() + + def _deserialize(self, data: Any) -> "InnerDoc": + # don't wrap already wrapped data + if isinstance(data, self._doc_class): + return data + + if isinstance(data, AttrDict): + data = data._d_ + + return self._wrap(data) + + def _serialize( + self, data: Optional[Union[Dict[str, Any], "InnerDoc"]] + ) -> Optional[Dict[str, Any]]: + if data is None: + return None + + # somebody assigned raw dict to the field, we should tolerate that + if isinstance(data, collections.abc.Mapping): + return data + + return data.to_dict() + + def clean(self, data: Any) -> Any: + data = super().clean(data) + if data is None: + return None + if isinstance(data, (list, AttrList)): + for d in cast(Iterator["InnerDoc"], data): + d.full_clean() + else: + data.full_clean() + return data + + def update(self, other: Any, update_only: bool = False) -> None: + if not isinstance(other, Object): + # not an inner/nested object, no merge possible + return + + self._mapping.update(other._mapping, update_only) + + +class Nested(Object): + name = "nested" + + def __init__(self, *args: Any, **kwargs: Any): + kwargs.setdefault("multi", True) + super().__init__(*args, **kwargs) + + +class Date(Field): + name = "date" + _coerce = True + + def __init__( + self, + default_timezone: Optional[Union[str, "tzinfo"]] = None, + *args: Any, + **kwargs: Any, + ): + """ + :arg default_timezone: timezone that will be automatically used for tz-naive values + May be instance of `datetime.tzinfo` or string containing TZ offset + """ + if isinstance(default_timezone, str): + self._default_timezone = tz.gettz(default_timezone) + else: + self._default_timezone = default_timezone + super().__init__(*args, **kwargs) + + def _deserialize(self, data: Any) -> Union[datetime, date]: + if isinstance(data, str): + try: + data = parser.parse(data) + except Exception as e: + raise ValidationException( + f"Could not parse date from the value ({data!r})", e + ) + # we treat the yyyy-MM-dd format as a special case + if hasattr(self, "format") and self.format == "yyyy-MM-dd": + data = data.date() + + if isinstance(data, datetime): + if self._default_timezone and data.tzinfo is None: + data = data.replace(tzinfo=self._default_timezone) + return data + if isinstance(data, date): + return data + if isinstance(data, int): + # Divide by a float to preserve milliseconds on the datetime. + return datetime.utcfromtimestamp(data / 1000.0) + + raise ValidationException(f"Could not parse date from the value ({data!r})") + + +class Text(Field): + _param_defs = { + "fields": {"type": "field", "hash": True}, + "analyzer": {"type": "analyzer"}, + "search_analyzer": {"type": "analyzer"}, + "search_quote_analyzer": {"type": "analyzer"}, + } + name = "text" + + +class SearchAsYouType(Field): + _param_defs = { + "analyzer": {"type": "analyzer"}, + "search_analyzer": {"type": "analyzer"}, + "search_quote_analyzer": {"type": "analyzer"}, + } + name = "search_as_you_type" + + +class Keyword(Field): + _param_defs = { + "fields": {"type": "field", "hash": True}, + "search_analyzer": {"type": "analyzer"}, + "normalizer": {"type": "normalizer"}, + } + name = "keyword" + + +class ConstantKeyword(Keyword): + name = "constant_keyword" + + +class Boolean(Field): + name = "boolean" + _coerce = True + + def _deserialize(self, data: Any) -> bool: + if data == "false": + return False + return bool(data) + + def clean(self, data: Any) -> Optional[bool]: + if data is not None: + data = self.deserialize(data) + if data is None and self._required: + raise ValidationException("Value required for this field.") + return data # type: ignore + + +class Float(Field): + name = "float" + _coerce = True + + def _deserialize(self, data: Any) -> float: + return float(data) + + +class DenseVector(Field): + name = "dense_vector" + _coerce = True + + def __init__(self, **kwargs: Any): + self._element_type = kwargs.get("element_type", "float") + if self._element_type in ["float", "byte"]: + kwargs["multi"] = True + super().__init__(**kwargs) + + def _deserialize(self, data: Any) -> Any: + if self._element_type == "float": + return float(data) + elif self._element_type == "byte": + return int(data) + return data + + +class SparseVector(Field): + name = "sparse_vector" + + +class HalfFloat(Float): + name = "half_float" + + +class ScaledFloat(Float): + name = "scaled_float" + + def __init__(self, scaling_factor: int, *args: Any, **kwargs: Any): + super().__init__(scaling_factor=scaling_factor, *args, **kwargs) + + +class Double(Float): + name = "double" + + +class RankFeature(Float): + name = "rank_feature" + + +class RankFeatures(Field): + name = "rank_features" + + +class Integer(Field): + name = "integer" + _coerce = True + + def _deserialize(self, data: Any) -> int: + return int(data) + + +class Byte(Integer): + name = "byte" + + +class Short(Integer): + name = "short" + + +class Long(Integer): + name = "long" + + +class Ip(Field): + name = "ip" + _coerce = True + + def _deserialize(self, data: Any) -> Union["IPv4Address", "IPv6Address"]: + # the ipaddress library for pypy only accepts unicode. + return ipaddress.ip_address(unicode(data)) + + def _serialize(self, data: Any) -> Optional[str]: + if data is None: + return None + return str(data) + + +class Binary(Field): + name = "binary" + _coerce = True + + def clean(self, data: str) -> str: + # Binary fields are opaque, so there's not much cleaning + # that can be done. + return data + + def _deserialize(self, data: Any) -> bytes: + return base64.b64decode(data) + + def _serialize(self, data: Any) -> Optional[str]: + if data is None: + return None + return base64.b64encode(data).decode() + + +class Point(Field): + name = "point" + + +class Shape(Field): + name = "shape" + + +class GeoPoint(Field): + name = "geo_point" + + +class GeoShape(Field): + name = "geo_shape" + + +class Completion(Field): + _param_defs = { + "analyzer": {"type": "analyzer"}, + "search_analyzer": {"type": "analyzer"}, + } + name = "completion" + + +class Percolator(Field): + name = "percolator" + _coerce = True + + def _deserialize(self, data: Any) -> "Query": + return Q(data) # type: ignore + + def _serialize(self, data: Any) -> Optional[Dict[str, Any]]: + if data is None: + return None + return data.to_dict() # type: ignore + + +class RangeField(Field): + _coerce = True + _core_field: Optional[Field] = None + + def _deserialize(self, data: Any) -> Range["_SupportsComparison"]: + if isinstance(data, Range): + return data + data = {k: self._core_field.deserialize(v) for k, v in data.items()} # type: ignore + return Range(data) + + def _serialize(self, data: Any) -> Optional[Dict[str, Any]]: + if data is None: + return None + if not isinstance(data, collections.abc.Mapping): + data = data.to_dict() + return {k: self._core_field.serialize(v) for k, v in data.items()} # type: ignore + + +class IntegerRange(RangeField): + name = "integer_range" + _core_field = Integer() + + +class FloatRange(RangeField): + name = "float_range" + _core_field = Float() + + +class LongRange(RangeField): + name = "long_range" + _core_field = Long() + + +class DoubleRange(RangeField): + name = "double_range" + _core_field = Double() + + +class DateRange(RangeField): + name = "date_range" + _core_field = Date() + + +class IpRange(Field): + # not a RangeField since ip_range supports CIDR ranges + name = "ip_range" + + +class Join(Field): + name = "join" + + +class TokenCount(Field): + name = "token_count" + + +class Murmur3(Field): + name = "murmur3" + + +class SemanticText(Field): + name = "semantic_text" diff --git a/elasticsearch/dsl/function.py b/elasticsearch/dsl/function.py new file mode 100644 index 000000000..9744e6f8b --- /dev/null +++ b/elasticsearch/dsl/function.py @@ -0,0 +1,180 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import collections.abc +from copy import deepcopy +from typing import ( + Any, + ClassVar, + Dict, + Literal, + MutableMapping, + Optional, + Union, + overload, +) + +from elastic_transport.client_utils import DEFAULT, DefaultType + +from .utils import AttrDict, DslBase + + +@overload +def SF(name_or_sf: MutableMapping[str, Any]) -> "ScoreFunction": ... + + +@overload +def SF(name_or_sf: "ScoreFunction") -> "ScoreFunction": ... + + +@overload +def SF(name_or_sf: str, **params: Any) -> "ScoreFunction": ... + + +def SF( + name_or_sf: Union[str, "ScoreFunction", MutableMapping[str, Any]], + **params: Any, +) -> "ScoreFunction": + # {"script_score": {"script": "_score"}, "filter": {}} + if isinstance(name_or_sf, collections.abc.MutableMapping): + if params: + raise ValueError("SF() cannot accept parameters when passing in a dict.") + + kwargs: Dict[str, Any] = {} + sf = deepcopy(name_or_sf) + for k in ScoreFunction._param_defs: + if k in name_or_sf: + kwargs[k] = sf.pop(k) + + # not sf, so just filter+weight, which used to be boost factor + sf_params = params + if not sf: + name = "boost_factor" + # {'FUNCTION': {...}} + elif len(sf) == 1: + name, sf_params = sf.popitem() + else: + raise ValueError(f"SF() got an unexpected fields in the dictionary: {sf!r}") + + # boost factor special case, see elasticsearch #6343 + if not isinstance(sf_params, collections.abc.Mapping): + sf_params = {"value": sf_params} + + # mix known params (from _param_defs) and from inside the function + kwargs.update(sf_params) + return ScoreFunction.get_dsl_class(name)(**kwargs) + + # ScriptScore(script="_score", filter=Q()) + if isinstance(name_or_sf, ScoreFunction): + if params: + raise ValueError( + "SF() cannot accept parameters when passing in a ScoreFunction object." + ) + return name_or_sf + + # "script_score", script="_score", filter=Q() + return ScoreFunction.get_dsl_class(name_or_sf)(**params) + + +class ScoreFunction(DslBase): + _type_name = "score_function" + _type_shortcut = staticmethod(SF) + _param_defs = { + "query": {"type": "query"}, + "filter": {"type": "query"}, + "weight": {}, + } + name: ClassVar[Optional[str]] = None + + def to_dict(self) -> Dict[str, Any]: + d = super().to_dict() + # filter and query dicts should be at the same level as us + for k in self._param_defs: + if self.name is not None: + val = d[self.name] + if isinstance(val, dict) and k in val: + d[k] = val.pop(k) + return d + + +class ScriptScore(ScoreFunction): + name = "script_score" + + +class BoostFactor(ScoreFunction): + name = "boost_factor" + + def to_dict(self) -> Dict[str, Any]: + d = super().to_dict() + if self.name is not None: + val = d[self.name] + if isinstance(val, dict): + if "value" in val: + d[self.name] = val.pop("value") + else: + del d[self.name] + return d + + +class RandomScore(ScoreFunction): + name = "random_score" + + +class FieldValueFactorScore(ScoreFunction): + name = "field_value_factor" + + +class FieldValueFactor(FieldValueFactorScore): # alias of the above + pass + + +class Linear(ScoreFunction): + name = "linear" + + +class Gauss(ScoreFunction): + name = "gauss" + + +class Exp(ScoreFunction): + name = "exp" + + +class DecayFunction(AttrDict[Any]): + def __init__( + self, + *, + decay: Union[float, "DefaultType"] = DEFAULT, + offset: Any = DEFAULT, + scale: Any = DEFAULT, + origin: Any = DEFAULT, + multi_value_mode: Union[ + Literal["min", "max", "avg", "sum"], "DefaultType" + ] = DEFAULT, + **kwargs: Any, + ): + if decay != DEFAULT: + kwargs["decay"] = decay + if offset != DEFAULT: + kwargs["offset"] = offset + if scale != DEFAULT: + kwargs["scale"] = scale + if origin != DEFAULT: + kwargs["origin"] = origin + if multi_value_mode != DEFAULT: + kwargs["multi_value_mode"] = multi_value_mode + super().__init__(kwargs) diff --git a/elasticsearch/dsl/index.py b/elasticsearch/dsl/index.py new file mode 100644 index 000000000..368e58d42 --- /dev/null +++ b/elasticsearch/dsl/index.py @@ -0,0 +1,23 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from ._async.index import ( # noqa: F401 + AsyncComposableIndexTemplate, + AsyncIndex, + AsyncIndexTemplate, +) +from ._sync.index import ComposableIndexTemplate, Index, IndexTemplate # noqa: F401 diff --git a/elasticsearch/dsl/index_base.py b/elasticsearch/dsl/index_base.py new file mode 100644 index 000000000..71ff50339 --- /dev/null +++ b/elasticsearch/dsl/index_base.py @@ -0,0 +1,178 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple + +from typing_extensions import Self + +from . import analysis +from .utils import AnyUsingType, merge + +if TYPE_CHECKING: + from .document_base import DocumentMeta + from .field import Field + from .mapping_base import MappingBase + + +class IndexBase: + def __init__(self, name: str, mapping_class: type, using: AnyUsingType = "default"): + """ + :arg name: name of the index + :arg using: connection alias to use, defaults to ``'default'`` + """ + self._name = name + self._doc_types: List["DocumentMeta"] = [] + self._using = using + self._settings: Dict[str, Any] = {} + self._aliases: Dict[str, Any] = {} + self._analysis: Dict[str, Any] = {} + self._mapping_class = mapping_class + self._mapping: Optional["MappingBase"] = None + + def resolve_nested( + self, field_path: str + ) -> Tuple[List[str], Optional["MappingBase"]]: + for doc in self._doc_types: + nested, field = doc._doc_type.mapping.resolve_nested(field_path) + if field is not None: + return nested, field + if self._mapping: + return self._mapping.resolve_nested(field_path) + return [], None + + def resolve_field(self, field_path: str) -> Optional["Field"]: + for doc in self._doc_types: + field = doc._doc_type.mapping.resolve_field(field_path) + if field is not None: + return field + if self._mapping: + return self._mapping.resolve_field(field_path) + return None + + def get_or_create_mapping(self) -> "MappingBase": + if self._mapping is None: + self._mapping = self._mapping_class() + return self._mapping + + def mapping(self, mapping: "MappingBase") -> None: + """ + Associate a mapping (an instance of + :class:`~elasticsearch.dsl.Mapping`) with this index. + This means that, when this index is created, it will contain the + mappings for the document type defined by those mappings. + """ + self.get_or_create_mapping().update(mapping) + + def document(self, document: "DocumentMeta") -> "DocumentMeta": + """ + Associate a :class:`~elasticsearch.dsl.Document` subclass with an index. + This means that, when this index is created, it will contain the + mappings for the ``Document``. If the ``Document`` class doesn't have a + default index yet (by defining ``class Index``), this instance will be + used. Can be used as a decorator:: + + i = Index('blog') + + @i.document + class Post(Document): + title = Text() + + # create the index, including Post mappings + i.create() + + # .search() will now return a Search object that will return + # properly deserialized Post instances + s = i.search() + """ + self._doc_types.append(document) + + # If the document index does not have any name, that means the user + # did not set any index already to the document. + # So set this index as document index + if document._index._name is None: + document._index = self + + return document + + def settings(self, **kwargs: Any) -> Self: + """ + Add settings to the index:: + + i = Index('i') + i.settings(number_of_shards=1, number_of_replicas=0) + + Multiple calls to ``settings`` will merge the keys, later overriding + the earlier. + """ + self._settings.update(kwargs) + return self + + def aliases(self, **kwargs: Any) -> Self: + """ + Add aliases to the index definition:: + + i = Index('blog-v2') + i.aliases(blog={}, published={'filter': Q('term', published=True)}) + """ + self._aliases.update(kwargs) + return self + + def analyzer(self, *args: Any, **kwargs: Any) -> None: + """ + Explicitly add an analyzer to an index. Note that all custom analyzers + defined in mappings will also be created. This is useful for search analyzers. + + Example:: + + from elasticsearch.dsl import analyzer, tokenizer + + my_analyzer = analyzer('my_analyzer', + tokenizer=tokenizer('trigram', 'nGram', min_gram=3, max_gram=3), + filter=['lowercase'] + ) + + i = Index('blog') + i.analyzer(my_analyzer) + + """ + analyzer = analysis.analyzer(*args, **kwargs) + d = analyzer.get_analysis_definition() + # empty custom analyzer, probably already defined out of our control + if not d: + return + + # merge the definition + merge(self._analysis, d, True) + + def to_dict(self) -> Dict[str, Any]: + out = {} + if self._settings: + out["settings"] = self._settings + if self._aliases: + out["aliases"] = self._aliases + mappings = self._mapping.to_dict() if self._mapping else {} + analysis = self._mapping._collect_analysis() if self._mapping else {} + for d in self._doc_types: + mapping = d._doc_type.mapping + merge(mappings, mapping.to_dict(), True) + merge(analysis, mapping._collect_analysis(), True) + if mappings: + out["mappings"] = mappings + if analysis or self._analysis: + merge(analysis, self._analysis) + out.setdefault("settings", {})["analysis"] = analysis + return out diff --git a/elasticsearch/dsl/mapping.py b/elasticsearch/dsl/mapping.py new file mode 100644 index 000000000..e39dd0490 --- /dev/null +++ b/elasticsearch/dsl/mapping.py @@ -0,0 +1,19 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from ._async.mapping import AsyncMapping # noqa: F401 +from ._sync.mapping import Mapping # noqa: F401 diff --git a/elasticsearch/dsl/mapping_base.py b/elasticsearch/dsl/mapping_base.py new file mode 100644 index 000000000..658cf6cfc --- /dev/null +++ b/elasticsearch/dsl/mapping_base.py @@ -0,0 +1,219 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import collections.abc +from itertools import chain +from typing import Any, Dict, Iterator, List, Optional, Tuple, cast + +from typing_extensions import Self + +from .field import Field, Nested, Text, construct_field +from .utils import DslBase + +META_FIELDS = frozenset( + ( + "dynamic", + "transform", + "dynamic_date_formats", + "date_detection", + "numeric_detection", + "dynamic_templates", + "enabled", + ) +) + + +class Properties(DslBase): + name = "properties" + _param_defs = {"properties": {"type": "field", "hash": True}} + + properties: Dict[str, Field] + + def __init__(self) -> None: + super().__init__() + + def __repr__(self) -> str: + return "Properties()" + + def __getitem__(self, name: str) -> Field: + return self.properties[name] + + def __contains__(self, name: str) -> bool: + return name in self.properties + + def to_dict(self) -> Dict[str, Any]: + return cast(Dict[str, Field], super().to_dict()["properties"]) + + def field(self, name: str, *args: Any, **kwargs: Any) -> Self: + self.properties[name] = construct_field(*args, **kwargs) + return self + + def _collect_fields(self) -> Iterator[Field]: + """Iterate over all Field objects within, including multi fields.""" + fields = cast(Dict[str, Field], self.properties.to_dict()) # type: ignore + for f in fields.values(): + yield f + # multi fields + if hasattr(f, "fields"): + yield from f.fields.to_dict().values() + # nested and inner objects + if hasattr(f, "_collect_fields"): + yield from f._collect_fields() + + def update(self, other_object: Any) -> None: + if not hasattr(other_object, "properties"): + # not an inner/nested object, no merge possible + return + + our, other = self.properties, other_object.properties + for name in other: + if name in our: + if hasattr(our[name], "update"): + our[name].update(other[name]) + continue + our[name] = other[name] + + +class MappingBase: + def __init__(self) -> None: + self.properties = Properties() + self._meta: Dict[str, Any] = {} + + def __repr__(self) -> str: + return "Mapping()" + + def _clone(self) -> Self: + m = self.__class__() + m.properties._params = self.properties._params.copy() + return m + + def resolve_nested( + self, field_path: str + ) -> Tuple[List[str], Optional["MappingBase"]]: + field = self + nested = [] + parts = field_path.split(".") + for i, step in enumerate(parts): + try: + field = field[step] # type: ignore[assignment] + except KeyError: + return [], None + if isinstance(field, Nested): + nested.append(".".join(parts[: i + 1])) + return nested, field + + def resolve_field(self, field_path: str) -> Optional[Field]: + field = self + for step in field_path.split("."): + try: + field = field[step] # type: ignore[assignment] + except KeyError: + return None + return cast(Field, field) + + def _collect_analysis(self) -> Dict[str, Any]: + analysis: Dict[str, Any] = {} + fields = [] + if "_all" in self._meta: + fields.append(Text(**self._meta["_all"])) + + for f in chain(fields, self.properties._collect_fields()): + for analyzer_name in ( + "analyzer", + "normalizer", + "search_analyzer", + "search_quote_analyzer", + ): + if not hasattr(f, analyzer_name): + continue + analyzer = getattr(f, analyzer_name) + d = analyzer.get_analysis_definition() + # empty custom analyzer, probably already defined out of our control + if not d: + continue + + # merge the definition + # TODO: conflict detection/resolution + for key in d: + analysis.setdefault(key, {}).update(d[key]) + + return analysis + + def _update_from_dict(self, raw: Dict[str, Any]) -> None: + for name, definition in raw.get("properties", {}).items(): + self.field(name, definition) + + # metadata like _all etc + for name, value in raw.items(): + if name != "properties": + if isinstance(value, collections.abc.Mapping): + self.meta(name, **value) + else: + self.meta(name, value) + + def update(self, mapping: "MappingBase", update_only: bool = False) -> None: + for name in mapping: + if update_only and name in self: + # nested and inner objects, merge recursively + if hasattr(self[name], "update"): + # FIXME only merge subfields, not the settings + self[name].update(mapping[name], update_only) + continue + self.field(name, mapping[name]) + + if update_only: + for name in mapping._meta: + if name not in self._meta: + self._meta[name] = mapping._meta[name] + else: + self._meta.update(mapping._meta) + + def __contains__(self, name: str) -> bool: + return name in self.properties.properties + + def __getitem__(self, name: str) -> Field: + return self.properties.properties[name] + + def __iter__(self) -> Iterator[str]: + return iter(self.properties.properties) + + def field(self, *args: Any, **kwargs: Any) -> Self: + self.properties.field(*args, **kwargs) + return self + + def meta(self, name: str, params: Any = None, **kwargs: Any) -> Self: + if not name.startswith("_") and name not in META_FIELDS: + name = "_" + name + + if params and kwargs: + raise ValueError("Meta configs cannot have both value and a dictionary.") + + self._meta[name] = kwargs if params is None else params + return self + + def to_dict(self) -> Dict[str, Any]: + meta = self._meta + + # hard coded serialization of analyzers in _all + if "_all" in meta: + meta = meta.copy() + _all = meta["_all"] = meta["_all"].copy() + for f in ("analyzer", "search_analyzer", "search_quote_analyzer"): + if hasattr(_all.get(f, None), "to_dict"): + _all[f] = _all[f].to_dict() + meta.update(self.properties.to_dict()) + return meta diff --git a/elasticsearch/dsl/py.typed b/elasticsearch/dsl/py.typed new file mode 100644 index 000000000..e69de29bb diff --git a/elasticsearch/dsl/query.py b/elasticsearch/dsl/query.py new file mode 100644 index 000000000..1b3d9f22b --- /dev/null +++ b/elasticsearch/dsl/query.py @@ -0,0 +1,2795 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import collections.abc +from copy import deepcopy +from itertools import chain +from typing import ( + TYPE_CHECKING, + Any, + Callable, + ClassVar, + Dict, + List, + Literal, + Mapping, + MutableMapping, + Optional, + Protocol, + Sequence, + TypeVar, + Union, + cast, + overload, +) + +from elastic_transport.client_utils import DEFAULT + +# 'SF' looks unused but the test suite assumes it's available +# from this module so others are liable to do so as well. +from .function import SF # noqa: F401 +from .function import ScoreFunction +from .utils import DslBase + +if TYPE_CHECKING: + from elastic_transport.client_utils import DefaultType + + from . import types, wrappers + + from .document_base import InstrumentedField + +_T = TypeVar("_T") +_M = TypeVar("_M", bound=Mapping[str, Any]) + + +class QProxiedProtocol(Protocol[_T]): + _proxied: _T + + +@overload +def Q(name_or_query: MutableMapping[str, _M]) -> "Query": ... + + +@overload +def Q(name_or_query: "Query") -> "Query": ... + + +@overload +def Q(name_or_query: QProxiedProtocol[_T]) -> _T: ... + + +@overload +def Q(name_or_query: str = "match_all", **params: Any) -> "Query": ... + + +def Q( + name_or_query: Union[ + str, + "Query", + QProxiedProtocol[_T], + MutableMapping[str, _M], + ] = "match_all", + **params: Any, +) -> Union["Query", _T]: + # {"match": {"title": "python"}} + if isinstance(name_or_query, collections.abc.MutableMapping): + if params: + raise ValueError("Q() cannot accept parameters when passing in a dict.") + if len(name_or_query) != 1: + raise ValueError( + 'Q() can only accept dict with a single query ({"match": {...}}). ' + "Instead it got (%r)" % name_or_query + ) + name, q_params = deepcopy(name_or_query).popitem() + return Query.get_dsl_class(name)(_expand__to_dot=False, **q_params) + + # MatchAll() + if isinstance(name_or_query, Query): + if params: + raise ValueError( + "Q() cannot accept parameters when passing in a Query object." + ) + return name_or_query + + # s.query = Q('filtered', query=s.query) + if hasattr(name_or_query, "_proxied"): + return cast(QProxiedProtocol[_T], name_or_query)._proxied + + # "match", title="python" + return Query.get_dsl_class(name_or_query)(**params) + + +class Query(DslBase): + _type_name = "query" + _type_shortcut = staticmethod(Q) + name: ClassVar[Optional[str]] = None + + # Add type annotations for methods not defined in every subclass + __ror__: ClassVar[Callable[["Query", "Query"], "Query"]] + __radd__: ClassVar[Callable[["Query", "Query"], "Query"]] + __rand__: ClassVar[Callable[["Query", "Query"], "Query"]] + + def __add__(self, other: "Query") -> "Query": + # make sure we give queries that know how to combine themselves + # preference + if hasattr(other, "__radd__"): + return other.__radd__(self) + return Bool(must=[self, other]) + + def __invert__(self) -> "Query": + return Bool(must_not=[self]) + + def __or__(self, other: "Query") -> "Query": + # make sure we give queries that know how to combine themselves + # preference + if hasattr(other, "__ror__"): + return other.__ror__(self) + return Bool(should=[self, other]) + + def __and__(self, other: "Query") -> "Query": + # make sure we give queries that know how to combine themselves + # preference + if hasattr(other, "__rand__"): + return other.__rand__(self) + return Bool(must=[self, other]) + + +class Bool(Query): + """ + matches documents matching boolean combinations of other queries. + + :arg filter: The clause (query) must appear in matching documents. + However, unlike `must`, the score of the query will be ignored. + :arg minimum_should_match: Specifies the number or percentage of + `should` clauses returned documents must match. + :arg must: The clause (query) must appear in matching documents and + will contribute to the score. + :arg must_not: The clause (query) must not appear in the matching + documents. Because scoring is ignored, a score of `0` is returned + for all documents. + :arg should: The clause (query) should appear in the matching + document. + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + name = "bool" + _param_defs = { + "filter": {"type": "query", "multi": True}, + "must": {"type": "query", "multi": True}, + "must_not": {"type": "query", "multi": True}, + "should": {"type": "query", "multi": True}, + } + + def __init__( + self, + *, + filter: Union[Query, Sequence[Query], "DefaultType"] = DEFAULT, + minimum_should_match: Union[int, str, "DefaultType"] = DEFAULT, + must: Union[Query, Sequence[Query], "DefaultType"] = DEFAULT, + must_not: Union[Query, Sequence[Query], "DefaultType"] = DEFAULT, + should: Union[Query, Sequence[Query], "DefaultType"] = DEFAULT, + boost: Union[float, "DefaultType"] = DEFAULT, + _name: Union[str, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + filter=filter, + minimum_should_match=minimum_should_match, + must=must, + must_not=must_not, + should=should, + boost=boost, + _name=_name, + **kwargs, + ) + + def __add__(self, other: Query) -> "Bool": + q = self._clone() + if isinstance(other, Bool): + q.must += other.must + q.should += other.should + q.must_not += other.must_not + q.filter += other.filter + else: + q.must.append(other) + return q + + __radd__ = __add__ + + def __or__(self, other: Query) -> Query: + for q in (self, other): + if isinstance(q, Bool) and not any( + (q.must, q.must_not, q.filter, getattr(q, "minimum_should_match", None)) + ): + other = self if q is other else other + q = q._clone() + if isinstance(other, Bool) and not any( + ( + other.must, + other.must_not, + other.filter, + getattr(other, "minimum_should_match", None), + ) + ): + q.should.extend(other.should) + else: + q.should.append(other) + return q + + return Bool(should=[self, other]) + + __ror__ = __or__ + + @property + def _min_should_match(self) -> int: + return getattr( + self, + "minimum_should_match", + 0 if not self.should or (self.must or self.filter) else 1, + ) + + def __invert__(self) -> Query: + # Because an empty Bool query is treated like + # MatchAll the inverse should be MatchNone + if not any(chain(self.must, self.filter, self.should, self.must_not)): + return MatchNone() + + negations: List[Query] = [] + for q in chain(self.must, self.filter): + negations.append(~q) + + for q in self.must_not: + negations.append(q) + + if self.should and self._min_should_match: + negations.append(Bool(must_not=self.should[:])) + + if len(negations) == 1: + return negations[0] + return Bool(should=negations) + + def __and__(self, other: Query) -> Query: + q = self._clone() + if isinstance(other, Bool): + q.must += other.must + q.must_not += other.must_not + q.filter += other.filter + q.should = [] + + # reset minimum_should_match as it will get calculated below + if "minimum_should_match" in q._params: + del q._params["minimum_should_match"] + + for qx in (self, other): + min_should_match = qx._min_should_match + # TODO: percentages or negative numbers will fail here + # for now we report an error + if not isinstance(min_should_match, int) or min_should_match < 0: + raise ValueError( + "Can only combine queries with positive integer values for minimum_should_match" + ) + # all subqueries are required + if len(qx.should) <= min_should_match: + q.must.extend(qx.should) + # not all of them are required, use it and remember min_should_match + elif not q.should: + q.minimum_should_match = min_should_match + q.should = qx.should + # all queries are optional, just extend should + elif q._min_should_match == 0 and min_should_match == 0: + q.should.extend(qx.should) + # not all are required, add a should list to the must with proper min_should_match + else: + q.must.append( + Bool(should=qx.should, minimum_should_match=min_should_match) + ) + else: + if not (q.must or q.filter) and q.should: + q._params.setdefault("minimum_should_match", 1) + q.must.append(other) + return q + + __rand__ = __and__ + + +class Boosting(Query): + """ + Returns documents matching a `positive` query while reducing the + relevance score of documents that also match a `negative` query. + + :arg negative_boost: (required) Floating point number between 0 and + 1.0 used to decrease the relevance scores of documents matching + the `negative` query. + :arg negative: (required) Query used to decrease the relevance score + of matching documents. + :arg positive: (required) Any returned documents must match this + query. + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + name = "boosting" + _param_defs = { + "negative": {"type": "query"}, + "positive": {"type": "query"}, + } + + def __init__( + self, + *, + negative_boost: Union[float, "DefaultType"] = DEFAULT, + negative: Union[Query, "DefaultType"] = DEFAULT, + positive: Union[Query, "DefaultType"] = DEFAULT, + boost: Union[float, "DefaultType"] = DEFAULT, + _name: Union[str, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + negative_boost=negative_boost, + negative=negative, + positive=positive, + boost=boost, + _name=_name, + **kwargs, + ) + + +class Common(Query): + """ + :arg _field: The field to use in this query. + :arg _value: The query value for the field. + """ + + name = "common" + + def __init__( + self, + _field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + _value: Union[ + "types.CommonTermsQuery", Dict[str, Any], "DefaultType" + ] = DEFAULT, + **kwargs: Any, + ): + if _field is not DEFAULT: + kwargs[str(_field)] = _value + super().__init__(**kwargs) + + +class CombinedFields(Query): + """ + The `combined_fields` query supports searching multiple text fields as + if their contents had been indexed into one combined field. + + :arg fields: (required) List of fields to search. Field wildcard + patterns are allowed. Only `text` fields are supported, and they + must all have the same search `analyzer`. + :arg query: (required) Text to search for in the provided `fields`. + The `combined_fields` query analyzes the provided text before + performing a search. + :arg auto_generate_synonyms_phrase_query: If true, match phrase + queries are automatically created for multi-term synonyms. + Defaults to `True` if omitted. + :arg operator: Boolean logic used to interpret text in the query + value. Defaults to `or` if omitted. + :arg minimum_should_match: Minimum number of clauses that must match + for a document to be returned. + :arg zero_terms_query: Indicates whether no documents are returned if + the analyzer removes all tokens, such as when using a `stop` + filter. Defaults to `none` if omitted. + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + name = "combined_fields" + + def __init__( + self, + *, + fields: Union[ + Sequence[Union[str, "InstrumentedField"]], "DefaultType" + ] = DEFAULT, + query: Union[str, "DefaultType"] = DEFAULT, + auto_generate_synonyms_phrase_query: Union[bool, "DefaultType"] = DEFAULT, + operator: Union[Literal["or", "and"], "DefaultType"] = DEFAULT, + minimum_should_match: Union[int, str, "DefaultType"] = DEFAULT, + zero_terms_query: Union[Literal["none", "all"], "DefaultType"] = DEFAULT, + boost: Union[float, "DefaultType"] = DEFAULT, + _name: Union[str, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + fields=fields, + query=query, + auto_generate_synonyms_phrase_query=auto_generate_synonyms_phrase_query, + operator=operator, + minimum_should_match=minimum_should_match, + zero_terms_query=zero_terms_query, + boost=boost, + _name=_name, + **kwargs, + ) + + +class ConstantScore(Query): + """ + Wraps a filter query and returns every matching document with a + relevance score equal to the `boost` parameter value. + + :arg filter: (required) Filter query you wish to run. Any returned + documents must match this query. Filter queries do not calculate + relevance scores. To speed up performance, Elasticsearch + automatically caches frequently used filter queries. + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + name = "constant_score" + _param_defs = { + "filter": {"type": "query"}, + } + + def __init__( + self, + *, + filter: Union[Query, "DefaultType"] = DEFAULT, + boost: Union[float, "DefaultType"] = DEFAULT, + _name: Union[str, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__(filter=filter, boost=boost, _name=_name, **kwargs) + + +class DisMax(Query): + """ + Returns documents matching one or more wrapped queries, called query + clauses or clauses. If a returned document matches multiple query + clauses, the `dis_max` query assigns the document the highest + relevance score from any matching clause, plus a tie breaking + increment for any additional matching subqueries. + + :arg queries: (required) One or more query clauses. Returned documents + must match one or more of these queries. If a document matches + multiple queries, Elasticsearch uses the highest relevance score. + :arg tie_breaker: Floating point number between 0 and 1.0 used to + increase the relevance scores of documents matching multiple query + clauses. + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + name = "dis_max" + _param_defs = { + "queries": {"type": "query", "multi": True}, + } + + def __init__( + self, + *, + queries: Union[Sequence[Query], "DefaultType"] = DEFAULT, + tie_breaker: Union[float, "DefaultType"] = DEFAULT, + boost: Union[float, "DefaultType"] = DEFAULT, + _name: Union[str, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + queries=queries, tie_breaker=tie_breaker, boost=boost, _name=_name, **kwargs + ) + + +class DistanceFeature(Query): + """ + Boosts the relevance score of documents closer to a provided origin + date or point. For example, you can use this query to give more weight + to documents closer to a certain date or location. + + :arg origin: (required) Date or point of origin used to calculate + distances. If the `field` value is a `date` or `date_nanos` field, + the `origin` value must be a date. Date Math, such as `now-1h`, is + supported. If the field value is a `geo_point` field, the `origin` + value must be a geopoint. + :arg pivot: (required) Distance from the `origin` at which relevance + scores receive half of the `boost` value. If the `field` value is + a `date` or `date_nanos` field, the `pivot` value must be a time + unit, such as `1h` or `10d`. If the `field` value is a `geo_point` + field, the `pivot` value must be a distance unit, such as `1km` or + `12m`. + :arg field: (required) Name of the field used to calculate distances. + This field must meet the following criteria: be a `date`, + `date_nanos` or `geo_point` field; have an `index` mapping + parameter value of `true`, which is the default; have an + `doc_values` mapping parameter value of `true`, which is the + default. + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + name = "distance_feature" + + def __init__( + self, + *, + origin: Any = DEFAULT, + pivot: Any = DEFAULT, + field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + boost: Union[float, "DefaultType"] = DEFAULT, + _name: Union[str, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + origin=origin, pivot=pivot, field=field, boost=boost, _name=_name, **kwargs + ) + + +class Exists(Query): + """ + Returns documents that contain an indexed value for a field. + + :arg field: (required) Name of the field you wish to search. + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + name = "exists" + + def __init__( + self, + *, + field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + boost: Union[float, "DefaultType"] = DEFAULT, + _name: Union[str, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__(field=field, boost=boost, _name=_name, **kwargs) + + +class FunctionScore(Query): + """ + The `function_score` enables you to modify the score of documents that + are retrieved by a query. + + :arg boost_mode: Defines how he newly computed score is combined with + the score of the query Defaults to `multiply` if omitted. + :arg functions: One or more functions that compute a new score for + each document returned by the query. + :arg max_boost: Restricts the new score to not exceed the provided + limit. + :arg min_score: Excludes documents that do not meet the provided score + threshold. + :arg query: A query that determines the documents for which a new + score is computed. + :arg score_mode: Specifies how the computed scores are combined + Defaults to `multiply` if omitted. + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + name = "function_score" + _param_defs = { + "functions": {"type": "score_function", "multi": True}, + "query": {"type": "query"}, + "filter": {"type": "query"}, + } + + def __init__( + self, + *, + boost_mode: Union[ + Literal["multiply", "replace", "sum", "avg", "max", "min"], "DefaultType" + ] = DEFAULT, + functions: Union[Sequence[ScoreFunction], "DefaultType"] = DEFAULT, + max_boost: Union[float, "DefaultType"] = DEFAULT, + min_score: Union[float, "DefaultType"] = DEFAULT, + query: Union[Query, "DefaultType"] = DEFAULT, + score_mode: Union[ + Literal["multiply", "sum", "avg", "first", "max", "min"], "DefaultType" + ] = DEFAULT, + boost: Union[float, "DefaultType"] = DEFAULT, + _name: Union[str, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + if functions is DEFAULT: + functions = [] + for name in ScoreFunction._classes: + if name in kwargs: + functions.append({name: kwargs.pop(name)}) # type: ignore + super().__init__( + boost_mode=boost_mode, + functions=functions, + max_boost=max_boost, + min_score=min_score, + query=query, + score_mode=score_mode, + boost=boost, + _name=_name, + **kwargs, + ) + + +class Fuzzy(Query): + """ + Returns documents that contain terms similar to the search term, as + measured by a Levenshtein edit distance. + + :arg _field: The field to use in this query. + :arg _value: The query value for the field. + """ + + name = "fuzzy" + + def __init__( + self, + _field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + _value: Union["types.FuzzyQuery", Dict[str, Any], "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + if _field is not DEFAULT: + kwargs[str(_field)] = _value + super().__init__(**kwargs) + + +class GeoBoundingBox(Query): + """ + Matches geo_point and geo_shape values that intersect a bounding box. + + :arg _field: The field to use in this query. + :arg _value: The query value for the field. + :arg type: + :arg validation_method: Set to `IGNORE_MALFORMED` to accept geo points + with invalid latitude or longitude. Set to `COERCE` to also try to + infer correct latitude or longitude. Defaults to `'strict'` if + omitted. + :arg ignore_unmapped: Set to `true` to ignore an unmapped field and + not match any documents for this query. Set to `false` to throw an + exception if the field is not mapped. + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + name = "geo_bounding_box" + + def __init__( + self, + _field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + _value: Union[ + "types.CoordsGeoBounds", + "types.TopLeftBottomRightGeoBounds", + "types.TopRightBottomLeftGeoBounds", + "types.WktGeoBounds", + Dict[str, Any], + "DefaultType", + ] = DEFAULT, + *, + type: Union[Literal["memory", "indexed"], "DefaultType"] = DEFAULT, + validation_method: Union[ + Literal["coerce", "ignore_malformed", "strict"], "DefaultType" + ] = DEFAULT, + ignore_unmapped: Union[bool, "DefaultType"] = DEFAULT, + boost: Union[float, "DefaultType"] = DEFAULT, + _name: Union[str, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + if _field is not DEFAULT: + kwargs[str(_field)] = _value + super().__init__( + type=type, + validation_method=validation_method, + ignore_unmapped=ignore_unmapped, + boost=boost, + _name=_name, + **kwargs, + ) + + +class GeoDistance(Query): + """ + Matches `geo_point` and `geo_shape` values within a given distance of + a geopoint. + + :arg _field: The field to use in this query. + :arg _value: The query value for the field. + :arg distance: (required) The radius of the circle centred on the + specified location. Points which fall into this circle are + considered to be matches. + :arg distance_type: How to compute the distance. Set to `plane` for a + faster calculation that's inaccurate on long distances and close + to the poles. Defaults to `'arc'` if omitted. + :arg validation_method: Set to `IGNORE_MALFORMED` to accept geo points + with invalid latitude or longitude. Set to `COERCE` to also try to + infer correct latitude or longitude. Defaults to `'strict'` if + omitted. + :arg ignore_unmapped: Set to `true` to ignore an unmapped field and + not match any documents for this query. Set to `false` to throw an + exception if the field is not mapped. + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + name = "geo_distance" + + def __init__( + self, + _field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + _value: Union[ + "types.LatLonGeoLocation", + "types.GeoHashLocation", + Sequence[float], + str, + Dict[str, Any], + "DefaultType", + ] = DEFAULT, + *, + distance: Union[str, "DefaultType"] = DEFAULT, + distance_type: Union[Literal["arc", "plane"], "DefaultType"] = DEFAULT, + validation_method: Union[ + Literal["coerce", "ignore_malformed", "strict"], "DefaultType" + ] = DEFAULT, + ignore_unmapped: Union[bool, "DefaultType"] = DEFAULT, + boost: Union[float, "DefaultType"] = DEFAULT, + _name: Union[str, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + if _field is not DEFAULT: + kwargs[str(_field)] = _value + super().__init__( + distance=distance, + distance_type=distance_type, + validation_method=validation_method, + ignore_unmapped=ignore_unmapped, + boost=boost, + _name=_name, + **kwargs, + ) + + +class GeoPolygon(Query): + """ + :arg _field: The field to use in this query. + :arg _value: The query value for the field. + :arg validation_method: Defaults to `'strict'` if omitted. + :arg ignore_unmapped: + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + name = "geo_polygon" + + def __init__( + self, + _field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + _value: Union[ + "types.GeoPolygonPoints", Dict[str, Any], "DefaultType" + ] = DEFAULT, + *, + validation_method: Union[ + Literal["coerce", "ignore_malformed", "strict"], "DefaultType" + ] = DEFAULT, + ignore_unmapped: Union[bool, "DefaultType"] = DEFAULT, + boost: Union[float, "DefaultType"] = DEFAULT, + _name: Union[str, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + if _field is not DEFAULT: + kwargs[str(_field)] = _value + super().__init__( + validation_method=validation_method, + ignore_unmapped=ignore_unmapped, + boost=boost, + _name=_name, + **kwargs, + ) + + +class GeoShape(Query): + """ + Filter documents indexed using either the `geo_shape` or the + `geo_point` type. + + :arg _field: The field to use in this query. + :arg _value: The query value for the field. + :arg ignore_unmapped: Set to `true` to ignore an unmapped field and + not match any documents for this query. Set to `false` to throw an + exception if the field is not mapped. + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + name = "geo_shape" + + def __init__( + self, + _field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + _value: Union[ + "types.GeoShapeFieldQuery", Dict[str, Any], "DefaultType" + ] = DEFAULT, + *, + ignore_unmapped: Union[bool, "DefaultType"] = DEFAULT, + boost: Union[float, "DefaultType"] = DEFAULT, + _name: Union[str, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + if _field is not DEFAULT: + kwargs[str(_field)] = _value + super().__init__( + ignore_unmapped=ignore_unmapped, boost=boost, _name=_name, **kwargs + ) + + +class HasChild(Query): + """ + Returns parent documents whose joined child documents match a provided + query. + + :arg query: (required) Query you wish to run on child documents of the + `type` field. If a child document matches the search, the query + returns the parent document. + :arg type: (required) Name of the child relationship mapped for the + `join` field. + :arg ignore_unmapped: Indicates whether to ignore an unmapped `type` + and not return any documents instead of an error. + :arg inner_hits: If defined, each search hit will contain inner hits. + :arg max_children: Maximum number of child documents that match the + query allowed for a returned parent document. If the parent + document exceeds this limit, it is excluded from the search + results. + :arg min_children: Minimum number of child documents that match the + query required to match the query for a returned parent document. + If the parent document does not meet this limit, it is excluded + from the search results. + :arg score_mode: Indicates how scores for matching child documents + affect the root parent document’s relevance score. Defaults to + `'none'` if omitted. + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + name = "has_child" + _param_defs = { + "query": {"type": "query"}, + } + + def __init__( + self, + *, + query: Union[Query, "DefaultType"] = DEFAULT, + type: Union[str, "DefaultType"] = DEFAULT, + ignore_unmapped: Union[bool, "DefaultType"] = DEFAULT, + inner_hits: Union["types.InnerHits", Dict[str, Any], "DefaultType"] = DEFAULT, + max_children: Union[int, "DefaultType"] = DEFAULT, + min_children: Union[int, "DefaultType"] = DEFAULT, + score_mode: Union[ + Literal["none", "avg", "sum", "max", "min"], "DefaultType" + ] = DEFAULT, + boost: Union[float, "DefaultType"] = DEFAULT, + _name: Union[str, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + query=query, + type=type, + ignore_unmapped=ignore_unmapped, + inner_hits=inner_hits, + max_children=max_children, + min_children=min_children, + score_mode=score_mode, + boost=boost, + _name=_name, + **kwargs, + ) + + +class HasParent(Query): + """ + Returns child documents whose joined parent document matches a + provided query. + + :arg parent_type: (required) Name of the parent relationship mapped + for the `join` field. + :arg query: (required) Query you wish to run on parent documents of + the `parent_type` field. If a parent document matches the search, + the query returns its child documents. + :arg ignore_unmapped: Indicates whether to ignore an unmapped + `parent_type` and not return any documents instead of an error. + You can use this parameter to query multiple indices that may not + contain the `parent_type`. + :arg inner_hits: If defined, each search hit will contain inner hits. + :arg score: Indicates whether the relevance score of a matching parent + document is aggregated into its child documents. + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + name = "has_parent" + _param_defs = { + "query": {"type": "query"}, + } + + def __init__( + self, + *, + parent_type: Union[str, "DefaultType"] = DEFAULT, + query: Union[Query, "DefaultType"] = DEFAULT, + ignore_unmapped: Union[bool, "DefaultType"] = DEFAULT, + inner_hits: Union["types.InnerHits", Dict[str, Any], "DefaultType"] = DEFAULT, + score: Union[bool, "DefaultType"] = DEFAULT, + boost: Union[float, "DefaultType"] = DEFAULT, + _name: Union[str, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + parent_type=parent_type, + query=query, + ignore_unmapped=ignore_unmapped, + inner_hits=inner_hits, + score=score, + boost=boost, + _name=_name, + **kwargs, + ) + + +class Ids(Query): + """ + Returns documents based on their IDs. This query uses document IDs + stored in the `_id` field. + + :arg values: An array of document IDs. + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + name = "ids" + + def __init__( + self, + *, + values: Union[str, Sequence[str], "DefaultType"] = DEFAULT, + boost: Union[float, "DefaultType"] = DEFAULT, + _name: Union[str, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__(values=values, boost=boost, _name=_name, **kwargs) + + +class Intervals(Query): + """ + Returns documents based on the order and proximity of matching terms. + + :arg _field: The field to use in this query. + :arg _value: The query value for the field. + """ + + name = "intervals" + + def __init__( + self, + _field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + _value: Union["types.IntervalsQuery", Dict[str, Any], "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + if _field is not DEFAULT: + kwargs[str(_field)] = _value + super().__init__(**kwargs) + + +class Knn(Query): + """ + Finds the k nearest vectors to a query vector, as measured by a + similarity metric. knn query finds nearest vectors through approximate + search on indexed dense_vectors. + + :arg field: (required) The name of the vector field to search against + :arg query_vector: The query vector + :arg query_vector_builder: The query vector builder. You must provide + a query_vector_builder or query_vector, but not both. + :arg num_candidates: The number of nearest neighbor candidates to + consider per shard + :arg k: The final number of nearest neighbors to return as top hits + :arg filter: Filters for the kNN search query + :arg similarity: The minimum similarity for a vector to be considered + a match + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + name = "knn" + _param_defs = { + "filter": {"type": "query", "multi": True}, + } + + def __init__( + self, + *, + field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + query_vector: Union[Sequence[float], "DefaultType"] = DEFAULT, + query_vector_builder: Union[ + "types.QueryVectorBuilder", Dict[str, Any], "DefaultType" + ] = DEFAULT, + num_candidates: Union[int, "DefaultType"] = DEFAULT, + k: Union[int, "DefaultType"] = DEFAULT, + filter: Union[Query, Sequence[Query], "DefaultType"] = DEFAULT, + similarity: Union[float, "DefaultType"] = DEFAULT, + boost: Union[float, "DefaultType"] = DEFAULT, + _name: Union[str, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + field=field, + query_vector=query_vector, + query_vector_builder=query_vector_builder, + num_candidates=num_candidates, + k=k, + filter=filter, + similarity=similarity, + boost=boost, + _name=_name, + **kwargs, + ) + + +class Match(Query): + """ + Returns documents that match a provided text, number, date or boolean + value. The provided text is analyzed before matching. + + :arg _field: The field to use in this query. + :arg _value: The query value for the field. + """ + + name = "match" + + def __init__( + self, + _field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + _value: Union["types.MatchQuery", Dict[str, Any], "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + if _field is not DEFAULT: + kwargs[str(_field)] = _value + super().__init__(**kwargs) + + +class MatchAll(Query): + """ + Matches all documents, giving them all a `_score` of 1.0. + + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + name = "match_all" + + def __init__( + self, + *, + boost: Union[float, "DefaultType"] = DEFAULT, + _name: Union[str, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__(boost=boost, _name=_name, **kwargs) + + def __add__(self, other: "Query") -> "Query": + return other._clone() + + __and__ = __rand__ = __radd__ = __add__ + + def __or__(self, other: "Query") -> "MatchAll": + return self + + __ror__ = __or__ + + def __invert__(self) -> "MatchNone": + return MatchNone() + + +EMPTY_QUERY = MatchAll() + + +class MatchBoolPrefix(Query): + """ + Analyzes its input and constructs a `bool` query from the terms. Each + term except the last is used in a `term` query. The last term is used + in a prefix query. + + :arg _field: The field to use in this query. + :arg _value: The query value for the field. + """ + + name = "match_bool_prefix" + + def __init__( + self, + _field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + _value: Union[ + "types.MatchBoolPrefixQuery", Dict[str, Any], "DefaultType" + ] = DEFAULT, + **kwargs: Any, + ): + if _field is not DEFAULT: + kwargs[str(_field)] = _value + super().__init__(**kwargs) + + +class MatchNone(Query): + """ + Matches no documents. + + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + name = "match_none" + + def __init__( + self, + *, + boost: Union[float, "DefaultType"] = DEFAULT, + _name: Union[str, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__(boost=boost, _name=_name, **kwargs) + + def __add__(self, other: "Query") -> "MatchNone": + return self + + __and__ = __rand__ = __radd__ = __add__ + + def __or__(self, other: "Query") -> "Query": + return other._clone() + + __ror__ = __or__ + + def __invert__(self) -> MatchAll: + return MatchAll() + + +class MatchPhrase(Query): + """ + Analyzes the text and creates a phrase query out of the analyzed text. + + :arg _field: The field to use in this query. + :arg _value: The query value for the field. + """ + + name = "match_phrase" + + def __init__( + self, + _field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + _value: Union[ + "types.MatchPhraseQuery", Dict[str, Any], "DefaultType" + ] = DEFAULT, + **kwargs: Any, + ): + if _field is not DEFAULT: + kwargs[str(_field)] = _value + super().__init__(**kwargs) + + +class MatchPhrasePrefix(Query): + """ + Returns documents that contain the words of a provided text, in the + same order as provided. The last term of the provided text is treated + as a prefix, matching any words that begin with that term. + + :arg _field: The field to use in this query. + :arg _value: The query value for the field. + """ + + name = "match_phrase_prefix" + + def __init__( + self, + _field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + _value: Union[ + "types.MatchPhrasePrefixQuery", Dict[str, Any], "DefaultType" + ] = DEFAULT, + **kwargs: Any, + ): + if _field is not DEFAULT: + kwargs[str(_field)] = _value + super().__init__(**kwargs) + + +class MoreLikeThis(Query): + """ + Returns documents that are "like" a given set of documents. + + :arg like: (required) Specifies free form text and/or a single or + multiple documents for which you want to find similar documents. + :arg analyzer: The analyzer that is used to analyze the free form + text. Defaults to the analyzer associated with the first field in + fields. + :arg boost_terms: Each term in the formed query could be further + boosted by their tf-idf score. This sets the boost factor to use + when using this feature. Defaults to deactivated (0). + :arg fail_on_unsupported_field: Controls whether the query should fail + (throw an exception) if any of the specified fields are not of the + supported types (`text` or `keyword`). Defaults to `True` if + omitted. + :arg fields: A list of fields to fetch and analyze the text from. + Defaults to the `index.query.default_field` index setting, which + has a default value of `*`. + :arg include: Specifies whether the input documents should also be + included in the search results returned. + :arg max_doc_freq: The maximum document frequency above which the + terms are ignored from the input document. + :arg max_query_terms: The maximum number of query terms that can be + selected. Defaults to `25` if omitted. + :arg max_word_length: The maximum word length above which the terms + are ignored. Defaults to unbounded (`0`). + :arg min_doc_freq: The minimum document frequency below which the + terms are ignored from the input document. Defaults to `5` if + omitted. + :arg minimum_should_match: After the disjunctive query has been + formed, this parameter controls the number of terms that must + match. + :arg min_term_freq: The minimum term frequency below which the terms + are ignored from the input document. Defaults to `2` if omitted. + :arg min_word_length: The minimum word length below which the terms + are ignored. + :arg routing: + :arg stop_words: An array of stop words. Any word in this set is + ignored. + :arg unlike: Used in combination with `like` to exclude documents that + match a set of terms. + :arg version: + :arg version_type: Defaults to `'internal'` if omitted. + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + name = "more_like_this" + + def __init__( + self, + *, + like: Union[ + Union[str, "types.LikeDocument"], + Sequence[Union[str, "types.LikeDocument"]], + Dict[str, Any], + "DefaultType", + ] = DEFAULT, + analyzer: Union[str, "DefaultType"] = DEFAULT, + boost_terms: Union[float, "DefaultType"] = DEFAULT, + fail_on_unsupported_field: Union[bool, "DefaultType"] = DEFAULT, + fields: Union[ + Sequence[Union[str, "InstrumentedField"]], "DefaultType" + ] = DEFAULT, + include: Union[bool, "DefaultType"] = DEFAULT, + max_doc_freq: Union[int, "DefaultType"] = DEFAULT, + max_query_terms: Union[int, "DefaultType"] = DEFAULT, + max_word_length: Union[int, "DefaultType"] = DEFAULT, + min_doc_freq: Union[int, "DefaultType"] = DEFAULT, + minimum_should_match: Union[int, str, "DefaultType"] = DEFAULT, + min_term_freq: Union[int, "DefaultType"] = DEFAULT, + min_word_length: Union[int, "DefaultType"] = DEFAULT, + routing: Union[str, "DefaultType"] = DEFAULT, + stop_words: Union[str, Sequence[str], "DefaultType"] = DEFAULT, + unlike: Union[ + Union[str, "types.LikeDocument"], + Sequence[Union[str, "types.LikeDocument"]], + Dict[str, Any], + "DefaultType", + ] = DEFAULT, + version: Union[int, "DefaultType"] = DEFAULT, + version_type: Union[ + Literal["internal", "external", "external_gte", "force"], "DefaultType" + ] = DEFAULT, + boost: Union[float, "DefaultType"] = DEFAULT, + _name: Union[str, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + like=like, + analyzer=analyzer, + boost_terms=boost_terms, + fail_on_unsupported_field=fail_on_unsupported_field, + fields=fields, + include=include, + max_doc_freq=max_doc_freq, + max_query_terms=max_query_terms, + max_word_length=max_word_length, + min_doc_freq=min_doc_freq, + minimum_should_match=minimum_should_match, + min_term_freq=min_term_freq, + min_word_length=min_word_length, + routing=routing, + stop_words=stop_words, + unlike=unlike, + version=version, + version_type=version_type, + boost=boost, + _name=_name, + **kwargs, + ) + + +class MultiMatch(Query): + """ + Enables you to search for a provided text, number, date or boolean + value across multiple fields. The provided text is analyzed before + matching. + + :arg query: (required) Text, number, boolean value or date you wish to + find in the provided field. + :arg analyzer: Analyzer used to convert the text in the query value + into tokens. + :arg auto_generate_synonyms_phrase_query: If `true`, match phrase + queries are automatically created for multi-term synonyms. + Defaults to `True` if omitted. + :arg cutoff_frequency: + :arg fields: The fields to be queried. Defaults to the + `index.query.default_field` index settings, which in turn defaults + to `*`. + :arg fuzziness: Maximum edit distance allowed for matching. + :arg fuzzy_rewrite: Method used to rewrite the query. + :arg fuzzy_transpositions: If `true`, edits for fuzzy matching include + transpositions of two adjacent characters (for example, `ab` to + `ba`). Can be applied to the term subqueries constructed for all + terms but the final term. Defaults to `True` if omitted. + :arg lenient: If `true`, format-based errors, such as providing a text + query value for a numeric field, are ignored. + :arg max_expansions: Maximum number of terms to which the query will + expand. Defaults to `50` if omitted. + :arg minimum_should_match: Minimum number of clauses that must match + for a document to be returned. + :arg operator: Boolean logic used to interpret text in the query + value. Defaults to `'or'` if omitted. + :arg prefix_length: Number of beginning characters left unchanged for + fuzzy matching. + :arg slop: Maximum number of positions allowed between matching + tokens. + :arg tie_breaker: Determines how scores for each per-term blended + query and scores across groups are combined. + :arg type: How `the` multi_match query is executed internally. + Defaults to `'best_fields'` if omitted. + :arg zero_terms_query: Indicates whether no documents are returned if + the `analyzer` removes all tokens, such as when using a `stop` + filter. Defaults to `'none'` if omitted. + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + name = "multi_match" + + def __init__( + self, + *, + query: Union[str, "DefaultType"] = DEFAULT, + analyzer: Union[str, "DefaultType"] = DEFAULT, + auto_generate_synonyms_phrase_query: Union[bool, "DefaultType"] = DEFAULT, + cutoff_frequency: Union[float, "DefaultType"] = DEFAULT, + fields: Union[ + Union[str, "InstrumentedField"], + Sequence[Union[str, "InstrumentedField"]], + "DefaultType", + ] = DEFAULT, + fuzziness: Union[str, int, "DefaultType"] = DEFAULT, + fuzzy_rewrite: Union[str, "DefaultType"] = DEFAULT, + fuzzy_transpositions: Union[bool, "DefaultType"] = DEFAULT, + lenient: Union[bool, "DefaultType"] = DEFAULT, + max_expansions: Union[int, "DefaultType"] = DEFAULT, + minimum_should_match: Union[int, str, "DefaultType"] = DEFAULT, + operator: Union[Literal["and", "or"], "DefaultType"] = DEFAULT, + prefix_length: Union[int, "DefaultType"] = DEFAULT, + slop: Union[int, "DefaultType"] = DEFAULT, + tie_breaker: Union[float, "DefaultType"] = DEFAULT, + type: Union[ + Literal[ + "best_fields", + "most_fields", + "cross_fields", + "phrase", + "phrase_prefix", + "bool_prefix", + ], + "DefaultType", + ] = DEFAULT, + zero_terms_query: Union[Literal["all", "none"], "DefaultType"] = DEFAULT, + boost: Union[float, "DefaultType"] = DEFAULT, + _name: Union[str, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + query=query, + analyzer=analyzer, + auto_generate_synonyms_phrase_query=auto_generate_synonyms_phrase_query, + cutoff_frequency=cutoff_frequency, + fields=fields, + fuzziness=fuzziness, + fuzzy_rewrite=fuzzy_rewrite, + fuzzy_transpositions=fuzzy_transpositions, + lenient=lenient, + max_expansions=max_expansions, + minimum_should_match=minimum_should_match, + operator=operator, + prefix_length=prefix_length, + slop=slop, + tie_breaker=tie_breaker, + type=type, + zero_terms_query=zero_terms_query, + boost=boost, + _name=_name, + **kwargs, + ) + + +class Nested(Query): + """ + Wraps another query to search nested fields. If an object matches the + search, the nested query returns the root parent document. + + :arg path: (required) Path to the nested object you wish to search. + :arg query: (required) Query you wish to run on nested objects in the + path. + :arg ignore_unmapped: Indicates whether to ignore an unmapped path and + not return any documents instead of an error. + :arg inner_hits: If defined, each search hit will contain inner hits. + :arg score_mode: How scores for matching child objects affect the root + parent document’s relevance score. Defaults to `'avg'` if omitted. + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + name = "nested" + _param_defs = { + "query": {"type": "query"}, + } + + def __init__( + self, + *, + path: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + query: Union[Query, "DefaultType"] = DEFAULT, + ignore_unmapped: Union[bool, "DefaultType"] = DEFAULT, + inner_hits: Union["types.InnerHits", Dict[str, Any], "DefaultType"] = DEFAULT, + score_mode: Union[ + Literal["none", "avg", "sum", "max", "min"], "DefaultType" + ] = DEFAULT, + boost: Union[float, "DefaultType"] = DEFAULT, + _name: Union[str, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + path=path, + query=query, + ignore_unmapped=ignore_unmapped, + inner_hits=inner_hits, + score_mode=score_mode, + boost=boost, + _name=_name, + **kwargs, + ) + + +class ParentId(Query): + """ + Returns child documents joined to a specific parent document. + + :arg id: ID of the parent document. + :arg ignore_unmapped: Indicates whether to ignore an unmapped `type` + and not return any documents instead of an error. + :arg type: Name of the child relationship mapped for the `join` field. + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + name = "parent_id" + + def __init__( + self, + *, + id: Union[str, "DefaultType"] = DEFAULT, + ignore_unmapped: Union[bool, "DefaultType"] = DEFAULT, + type: Union[str, "DefaultType"] = DEFAULT, + boost: Union[float, "DefaultType"] = DEFAULT, + _name: Union[str, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + id=id, + ignore_unmapped=ignore_unmapped, + type=type, + boost=boost, + _name=_name, + **kwargs, + ) + + +class Percolate(Query): + """ + Matches queries stored in an index. + + :arg field: (required) Field that holds the indexed queries. The field + must use the `percolator` mapping type. + :arg document: The source of the document being percolated. + :arg documents: An array of sources of the documents being percolated. + :arg id: The ID of a stored document to percolate. + :arg index: The index of a stored document to percolate. + :arg name: The suffix used for the `_percolator_document_slot` field + when multiple `percolate` queries are specified. + :arg preference: Preference used to fetch document to percolate. + :arg routing: Routing used to fetch document to percolate. + :arg version: The expected version of a stored document to percolate. + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + name = "percolate" + + def __init__( + self, + *, + field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + document: Any = DEFAULT, + documents: Union[Sequence[Any], "DefaultType"] = DEFAULT, + id: Union[str, "DefaultType"] = DEFAULT, + index: Union[str, "DefaultType"] = DEFAULT, + name: Union[str, "DefaultType"] = DEFAULT, + preference: Union[str, "DefaultType"] = DEFAULT, + routing: Union[str, "DefaultType"] = DEFAULT, + version: Union[int, "DefaultType"] = DEFAULT, + boost: Union[float, "DefaultType"] = DEFAULT, + _name: Union[str, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + field=field, + document=document, + documents=documents, + id=id, + index=index, + name=name, + preference=preference, + routing=routing, + version=version, + boost=boost, + _name=_name, + **kwargs, + ) + + +class Pinned(Query): + """ + Promotes selected documents to rank higher than those matching a given + query. + + :arg organic: (required) Any choice of query used to rank documents + which will be ranked below the "pinned" documents. + :arg ids: Document IDs listed in the order they are to appear in + results. Required if `docs` is not specified. + :arg docs: Documents listed in the order they are to appear in + results. Required if `ids` is not specified. + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + name = "pinned" + _param_defs = { + "organic": {"type": "query"}, + } + + def __init__( + self, + *, + organic: Union[Query, "DefaultType"] = DEFAULT, + ids: Union[Sequence[str], "DefaultType"] = DEFAULT, + docs: Union[ + Sequence["types.PinnedDoc"], Sequence[Dict[str, Any]], "DefaultType" + ] = DEFAULT, + boost: Union[float, "DefaultType"] = DEFAULT, + _name: Union[str, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + organic=organic, ids=ids, docs=docs, boost=boost, _name=_name, **kwargs + ) + + +class Prefix(Query): + """ + Returns documents that contain a specific prefix in a provided field. + + :arg _field: The field to use in this query. + :arg _value: The query value for the field. + """ + + name = "prefix" + + def __init__( + self, + _field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + _value: Union["types.PrefixQuery", Dict[str, Any], "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + if _field is not DEFAULT: + kwargs[str(_field)] = _value + super().__init__(**kwargs) + + +class QueryString(Query): + """ + Returns documents based on a provided query string, using a parser + with a strict syntax. + + :arg query: (required) Query string you wish to parse and use for + search. + :arg allow_leading_wildcard: If `true`, the wildcard characters `*` + and `?` are allowed as the first character of the query string. + Defaults to `True` if omitted. + :arg analyzer: Analyzer used to convert text in the query string into + tokens. + :arg analyze_wildcard: If `true`, the query attempts to analyze + wildcard terms in the query string. + :arg auto_generate_synonyms_phrase_query: If `true`, match phrase + queries are automatically created for multi-term synonyms. + Defaults to `True` if omitted. + :arg default_field: Default field to search if no field is provided in + the query string. Supports wildcards (`*`). Defaults to the + `index.query.default_field` index setting, which has a default + value of `*`. + :arg default_operator: Default boolean logic used to interpret text in + the query string if no operators are specified. Defaults to `'or'` + if omitted. + :arg enable_position_increments: If `true`, enable position increments + in queries constructed from a `query_string` search. Defaults to + `True` if omitted. + :arg escape: + :arg fields: Array of fields to search. Supports wildcards (`*`). + :arg fuzziness: Maximum edit distance allowed for fuzzy matching. + :arg fuzzy_max_expansions: Maximum number of terms to which the query + expands for fuzzy matching. Defaults to `50` if omitted. + :arg fuzzy_prefix_length: Number of beginning characters left + unchanged for fuzzy matching. + :arg fuzzy_rewrite: Method used to rewrite the query. + :arg fuzzy_transpositions: If `true`, edits for fuzzy matching include + transpositions of two adjacent characters (for example, `ab` to + `ba`). Defaults to `True` if omitted. + :arg lenient: If `true`, format-based errors, such as providing a text + value for a numeric field, are ignored. + :arg max_determinized_states: Maximum number of automaton states + required for the query. Defaults to `10000` if omitted. + :arg minimum_should_match: Minimum number of clauses that must match + for a document to be returned. + :arg phrase_slop: Maximum number of positions allowed between matching + tokens for phrases. + :arg quote_analyzer: Analyzer used to convert quoted text in the query + string into tokens. For quoted text, this parameter overrides the + analyzer specified in the `analyzer` parameter. + :arg quote_field_suffix: Suffix appended to quoted text in the query + string. You can use this suffix to use a different analysis method + for exact matches. + :arg rewrite: Method used to rewrite the query. + :arg tie_breaker: How to combine the queries generated from the + individual search terms in the resulting `dis_max` query. + :arg time_zone: Coordinated Universal Time (UTC) offset or IANA time + zone used to convert date values in the query string to UTC. + :arg type: Determines how the query matches and scores documents. + Defaults to `'best_fields'` if omitted. + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + name = "query_string" + + def __init__( + self, + *, + query: Union[str, "DefaultType"] = DEFAULT, + allow_leading_wildcard: Union[bool, "DefaultType"] = DEFAULT, + analyzer: Union[str, "DefaultType"] = DEFAULT, + analyze_wildcard: Union[bool, "DefaultType"] = DEFAULT, + auto_generate_synonyms_phrase_query: Union[bool, "DefaultType"] = DEFAULT, + default_field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + default_operator: Union[Literal["and", "or"], "DefaultType"] = DEFAULT, + enable_position_increments: Union[bool, "DefaultType"] = DEFAULT, + escape: Union[bool, "DefaultType"] = DEFAULT, + fields: Union[ + Sequence[Union[str, "InstrumentedField"]], "DefaultType" + ] = DEFAULT, + fuzziness: Union[str, int, "DefaultType"] = DEFAULT, + fuzzy_max_expansions: Union[int, "DefaultType"] = DEFAULT, + fuzzy_prefix_length: Union[int, "DefaultType"] = DEFAULT, + fuzzy_rewrite: Union[str, "DefaultType"] = DEFAULT, + fuzzy_transpositions: Union[bool, "DefaultType"] = DEFAULT, + lenient: Union[bool, "DefaultType"] = DEFAULT, + max_determinized_states: Union[int, "DefaultType"] = DEFAULT, + minimum_should_match: Union[int, str, "DefaultType"] = DEFAULT, + phrase_slop: Union[float, "DefaultType"] = DEFAULT, + quote_analyzer: Union[str, "DefaultType"] = DEFAULT, + quote_field_suffix: Union[str, "DefaultType"] = DEFAULT, + rewrite: Union[str, "DefaultType"] = DEFAULT, + tie_breaker: Union[float, "DefaultType"] = DEFAULT, + time_zone: Union[str, "DefaultType"] = DEFAULT, + type: Union[ + Literal[ + "best_fields", + "most_fields", + "cross_fields", + "phrase", + "phrase_prefix", + "bool_prefix", + ], + "DefaultType", + ] = DEFAULT, + boost: Union[float, "DefaultType"] = DEFAULT, + _name: Union[str, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + query=query, + allow_leading_wildcard=allow_leading_wildcard, + analyzer=analyzer, + analyze_wildcard=analyze_wildcard, + auto_generate_synonyms_phrase_query=auto_generate_synonyms_phrase_query, + default_field=default_field, + default_operator=default_operator, + enable_position_increments=enable_position_increments, + escape=escape, + fields=fields, + fuzziness=fuzziness, + fuzzy_max_expansions=fuzzy_max_expansions, + fuzzy_prefix_length=fuzzy_prefix_length, + fuzzy_rewrite=fuzzy_rewrite, + fuzzy_transpositions=fuzzy_transpositions, + lenient=lenient, + max_determinized_states=max_determinized_states, + minimum_should_match=minimum_should_match, + phrase_slop=phrase_slop, + quote_analyzer=quote_analyzer, + quote_field_suffix=quote_field_suffix, + rewrite=rewrite, + tie_breaker=tie_breaker, + time_zone=time_zone, + type=type, + boost=boost, + _name=_name, + **kwargs, + ) + + +class Range(Query): + """ + Returns documents that contain terms within a provided range. + + :arg _field: The field to use in this query. + :arg _value: The query value for the field. + """ + + name = "range" + + def __init__( + self, + _field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + _value: Union["wrappers.Range[Any]", Dict[str, Any], "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + if _field is not DEFAULT: + kwargs[str(_field)] = _value + super().__init__(**kwargs) + + +class RankFeature(Query): + """ + Boosts the relevance score of documents based on the numeric value of + a `rank_feature` or `rank_features` field. + + :arg field: (required) `rank_feature` or `rank_features` field used to + boost relevance scores. + :arg saturation: Saturation function used to boost relevance scores + based on the value of the rank feature `field`. + :arg log: Logarithmic function used to boost relevance scores based on + the value of the rank feature `field`. + :arg linear: Linear function used to boost relevance scores based on + the value of the rank feature `field`. + :arg sigmoid: Sigmoid function used to boost relevance scores based on + the value of the rank feature `field`. + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + name = "rank_feature" + + def __init__( + self, + *, + field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + saturation: Union[ + "types.RankFeatureFunctionSaturation", Dict[str, Any], "DefaultType" + ] = DEFAULT, + log: Union[ + "types.RankFeatureFunctionLogarithm", Dict[str, Any], "DefaultType" + ] = DEFAULT, + linear: Union[ + "types.RankFeatureFunctionLinear", Dict[str, Any], "DefaultType" + ] = DEFAULT, + sigmoid: Union[ + "types.RankFeatureFunctionSigmoid", Dict[str, Any], "DefaultType" + ] = DEFAULT, + boost: Union[float, "DefaultType"] = DEFAULT, + _name: Union[str, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + field=field, + saturation=saturation, + log=log, + linear=linear, + sigmoid=sigmoid, + boost=boost, + _name=_name, + **kwargs, + ) + + +class Regexp(Query): + """ + Returns documents that contain terms matching a regular expression. + + :arg _field: The field to use in this query. + :arg _value: The query value for the field. + """ + + name = "regexp" + + def __init__( + self, + _field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + _value: Union["types.RegexpQuery", Dict[str, Any], "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + if _field is not DEFAULT: + kwargs[str(_field)] = _value + super().__init__(**kwargs) + + +class Rule(Query): + """ + :arg organic: (required) + :arg ruleset_ids: (required) + :arg match_criteria: (required) + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + name = "rule" + _param_defs = { + "organic": {"type": "query"}, + } + + def __init__( + self, + *, + organic: Union[Query, "DefaultType"] = DEFAULT, + ruleset_ids: Union[Sequence[str], "DefaultType"] = DEFAULT, + match_criteria: Any = DEFAULT, + boost: Union[float, "DefaultType"] = DEFAULT, + _name: Union[str, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + organic=organic, + ruleset_ids=ruleset_ids, + match_criteria=match_criteria, + boost=boost, + _name=_name, + **kwargs, + ) + + +class Script(Query): + """ + Filters documents based on a provided script. The script query is + typically used in a filter context. + + :arg script: (required) Contains a script to run as a query. This + script must return a boolean value, `true` or `false`. + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + name = "script" + + def __init__( + self, + *, + script: Union["types.Script", Dict[str, Any], "DefaultType"] = DEFAULT, + boost: Union[float, "DefaultType"] = DEFAULT, + _name: Union[str, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__(script=script, boost=boost, _name=_name, **kwargs) + + +class ScriptScore(Query): + """ + Uses a script to provide a custom score for returned documents. + + :arg query: (required) Query used to return documents. + :arg script: (required) Script used to compute the score of documents + returned by the query. Important: final relevance scores from the + `script_score` query cannot be negative. + :arg min_score: Documents with a score lower than this floating point + number are excluded from the search results. + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + name = "script_score" + _param_defs = { + "query": {"type": "query"}, + } + + def __init__( + self, + *, + query: Union[Query, "DefaultType"] = DEFAULT, + script: Union["types.Script", Dict[str, Any], "DefaultType"] = DEFAULT, + min_score: Union[float, "DefaultType"] = DEFAULT, + boost: Union[float, "DefaultType"] = DEFAULT, + _name: Union[str, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + query=query, + script=script, + min_score=min_score, + boost=boost, + _name=_name, + **kwargs, + ) + + +class Semantic(Query): + """ + A semantic query to semantic_text field types + + :arg field: (required) The field to query, which must be a + semantic_text field type + :arg query: (required) The query text + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + name = "semantic" + + def __init__( + self, + *, + field: Union[str, "DefaultType"] = DEFAULT, + query: Union[str, "DefaultType"] = DEFAULT, + boost: Union[float, "DefaultType"] = DEFAULT, + _name: Union[str, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__(field=field, query=query, boost=boost, _name=_name, **kwargs) + + +class Shape(Query): + """ + Queries documents that contain fields indexed using the `shape` type. + + :arg _field: The field to use in this query. + :arg _value: The query value for the field. + :arg ignore_unmapped: When set to `true` the query ignores an unmapped + field and will not match any documents. + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + name = "shape" + + def __init__( + self, + _field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + _value: Union["types.ShapeFieldQuery", Dict[str, Any], "DefaultType"] = DEFAULT, + *, + ignore_unmapped: Union[bool, "DefaultType"] = DEFAULT, + boost: Union[float, "DefaultType"] = DEFAULT, + _name: Union[str, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + if _field is not DEFAULT: + kwargs[str(_field)] = _value + super().__init__( + ignore_unmapped=ignore_unmapped, boost=boost, _name=_name, **kwargs + ) + + +class SimpleQueryString(Query): + """ + Returns documents based on a provided query string, using a parser + with a limited but fault-tolerant syntax. + + :arg query: (required) Query string in the simple query string syntax + you wish to parse and use for search. + :arg analyzer: Analyzer used to convert text in the query string into + tokens. + :arg analyze_wildcard: If `true`, the query attempts to analyze + wildcard terms in the query string. + :arg auto_generate_synonyms_phrase_query: If `true`, the parser + creates a match_phrase query for each multi-position token. + Defaults to `True` if omitted. + :arg default_operator: Default boolean logic used to interpret text in + the query string if no operators are specified. Defaults to `'or'` + if omitted. + :arg fields: Array of fields you wish to search. Accepts wildcard + expressions. You also can boost relevance scores for matches to + particular fields using a caret (`^`) notation. Defaults to the + `index.query.default_field index` setting, which has a default + value of `*`. + :arg flags: List of enabled operators for the simple query string + syntax. Defaults to `ALL` if omitted. + :arg fuzzy_max_expansions: Maximum number of terms to which the query + expands for fuzzy matching. Defaults to `50` if omitted. + :arg fuzzy_prefix_length: Number of beginning characters left + unchanged for fuzzy matching. + :arg fuzzy_transpositions: If `true`, edits for fuzzy matching include + transpositions of two adjacent characters (for example, `ab` to + `ba`). + :arg lenient: If `true`, format-based errors, such as providing a text + value for a numeric field, are ignored. + :arg minimum_should_match: Minimum number of clauses that must match + for a document to be returned. + :arg quote_field_suffix: Suffix appended to quoted text in the query + string. + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + name = "simple_query_string" + + def __init__( + self, + *, + query: Union[str, "DefaultType"] = DEFAULT, + analyzer: Union[str, "DefaultType"] = DEFAULT, + analyze_wildcard: Union[bool, "DefaultType"] = DEFAULT, + auto_generate_synonyms_phrase_query: Union[bool, "DefaultType"] = DEFAULT, + default_operator: Union[Literal["and", "or"], "DefaultType"] = DEFAULT, + fields: Union[ + Sequence[Union[str, "InstrumentedField"]], "DefaultType" + ] = DEFAULT, + flags: Union[ + "types.PipeSeparatedFlags", Dict[str, Any], "DefaultType" + ] = DEFAULT, + fuzzy_max_expansions: Union[int, "DefaultType"] = DEFAULT, + fuzzy_prefix_length: Union[int, "DefaultType"] = DEFAULT, + fuzzy_transpositions: Union[bool, "DefaultType"] = DEFAULT, + lenient: Union[bool, "DefaultType"] = DEFAULT, + minimum_should_match: Union[int, str, "DefaultType"] = DEFAULT, + quote_field_suffix: Union[str, "DefaultType"] = DEFAULT, + boost: Union[float, "DefaultType"] = DEFAULT, + _name: Union[str, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + query=query, + analyzer=analyzer, + analyze_wildcard=analyze_wildcard, + auto_generate_synonyms_phrase_query=auto_generate_synonyms_phrase_query, + default_operator=default_operator, + fields=fields, + flags=flags, + fuzzy_max_expansions=fuzzy_max_expansions, + fuzzy_prefix_length=fuzzy_prefix_length, + fuzzy_transpositions=fuzzy_transpositions, + lenient=lenient, + minimum_should_match=minimum_should_match, + quote_field_suffix=quote_field_suffix, + boost=boost, + _name=_name, + **kwargs, + ) + + +class SpanContaining(Query): + """ + Returns matches which enclose another span query. + + :arg big: (required) Can be any span query. Matching spans from `big` + that contain matches from `little` are returned. + :arg little: (required) Can be any span query. Matching spans from + `big` that contain matches from `little` are returned. + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + name = "span_containing" + + def __init__( + self, + *, + big: Union["types.SpanQuery", Dict[str, Any], "DefaultType"] = DEFAULT, + little: Union["types.SpanQuery", Dict[str, Any], "DefaultType"] = DEFAULT, + boost: Union[float, "DefaultType"] = DEFAULT, + _name: Union[str, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__(big=big, little=little, boost=boost, _name=_name, **kwargs) + + +class SpanFieldMasking(Query): + """ + Wrapper to allow span queries to participate in composite single-field + span queries by _lying_ about their search field. + + :arg field: (required) + :arg query: (required) + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + name = "span_field_masking" + + def __init__( + self, + *, + field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + query: Union["types.SpanQuery", Dict[str, Any], "DefaultType"] = DEFAULT, + boost: Union[float, "DefaultType"] = DEFAULT, + _name: Union[str, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__(field=field, query=query, boost=boost, _name=_name, **kwargs) + + +class SpanFirst(Query): + """ + Matches spans near the beginning of a field. + + :arg end: (required) Controls the maximum end position permitted in a + match. + :arg match: (required) Can be any other span type query. + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + name = "span_first" + + def __init__( + self, + *, + end: Union[int, "DefaultType"] = DEFAULT, + match: Union["types.SpanQuery", Dict[str, Any], "DefaultType"] = DEFAULT, + boost: Union[float, "DefaultType"] = DEFAULT, + _name: Union[str, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__(end=end, match=match, boost=boost, _name=_name, **kwargs) + + +class SpanMulti(Query): + """ + Allows you to wrap a multi term query (one of `wildcard`, `fuzzy`, + `prefix`, `range`, or `regexp` query) as a `span` query, so it can be + nested. + + :arg match: (required) Should be a multi term query (one of + `wildcard`, `fuzzy`, `prefix`, `range`, or `regexp` query). + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + name = "span_multi" + _param_defs = { + "match": {"type": "query"}, + } + + def __init__( + self, + *, + match: Union[Query, "DefaultType"] = DEFAULT, + boost: Union[float, "DefaultType"] = DEFAULT, + _name: Union[str, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__(match=match, boost=boost, _name=_name, **kwargs) + + +class SpanNear(Query): + """ + Matches spans which are near one another. You can specify `slop`, the + maximum number of intervening unmatched positions, as well as whether + matches are required to be in-order. + + :arg clauses: (required) Array of one or more other span type queries. + :arg in_order: Controls whether matches are required to be in-order. + :arg slop: Controls the maximum number of intervening unmatched + positions permitted. + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + name = "span_near" + + def __init__( + self, + *, + clauses: Union[ + Sequence["types.SpanQuery"], Sequence[Dict[str, Any]], "DefaultType" + ] = DEFAULT, + in_order: Union[bool, "DefaultType"] = DEFAULT, + slop: Union[int, "DefaultType"] = DEFAULT, + boost: Union[float, "DefaultType"] = DEFAULT, + _name: Union[str, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + clauses=clauses, + in_order=in_order, + slop=slop, + boost=boost, + _name=_name, + **kwargs, + ) + + +class SpanNot(Query): + """ + Removes matches which overlap with another span query or which are + within x tokens before (controlled by the parameter `pre`) or y tokens + after (controlled by the parameter `post`) another span query. + + :arg exclude: (required) Span query whose matches must not overlap + those returned. + :arg include: (required) Span query whose matches are filtered. + :arg dist: The number of tokens from within the include span that + can’t have overlap with the exclude span. Equivalent to setting + both `pre` and `post`. + :arg post: The number of tokens after the include span that can’t have + overlap with the exclude span. + :arg pre: The number of tokens before the include span that can’t have + overlap with the exclude span. + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + name = "span_not" + + def __init__( + self, + *, + exclude: Union["types.SpanQuery", Dict[str, Any], "DefaultType"] = DEFAULT, + include: Union["types.SpanQuery", Dict[str, Any], "DefaultType"] = DEFAULT, + dist: Union[int, "DefaultType"] = DEFAULT, + post: Union[int, "DefaultType"] = DEFAULT, + pre: Union[int, "DefaultType"] = DEFAULT, + boost: Union[float, "DefaultType"] = DEFAULT, + _name: Union[str, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + exclude=exclude, + include=include, + dist=dist, + post=post, + pre=pre, + boost=boost, + _name=_name, + **kwargs, + ) + + +class SpanOr(Query): + """ + Matches the union of its span clauses. + + :arg clauses: (required) Array of one or more other span type queries. + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + name = "span_or" + + def __init__( + self, + *, + clauses: Union[ + Sequence["types.SpanQuery"], Sequence[Dict[str, Any]], "DefaultType" + ] = DEFAULT, + boost: Union[float, "DefaultType"] = DEFAULT, + _name: Union[str, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__(clauses=clauses, boost=boost, _name=_name, **kwargs) + + +class SpanTerm(Query): + """ + Matches spans containing a term. + + :arg _field: The field to use in this query. + :arg _value: The query value for the field. + """ + + name = "span_term" + + def __init__( + self, + _field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + _value: Union["types.SpanTermQuery", Dict[str, Any], "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + if _field is not DEFAULT: + kwargs[str(_field)] = _value + super().__init__(**kwargs) + + +class SpanWithin(Query): + """ + Returns matches which are enclosed inside another span query. + + :arg big: (required) Can be any span query. Matching spans from + `little` that are enclosed within `big` are returned. + :arg little: (required) Can be any span query. Matching spans from + `little` that are enclosed within `big` are returned. + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + name = "span_within" + + def __init__( + self, + *, + big: Union["types.SpanQuery", Dict[str, Any], "DefaultType"] = DEFAULT, + little: Union["types.SpanQuery", Dict[str, Any], "DefaultType"] = DEFAULT, + boost: Union[float, "DefaultType"] = DEFAULT, + _name: Union[str, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__(big=big, little=little, boost=boost, _name=_name, **kwargs) + + +class SparseVector(Query): + """ + Using input query vectors or a natural language processing model to + convert a query into a list of token-weight pairs, queries against a + sparse vector field. + + :arg field: (required) The name of the field that contains the token- + weight pairs to be searched against. This field must be a mapped + sparse_vector field. + :arg query_vector: Dictionary of precomputed sparse vectors and their + associated weights. Only one of inference_id or query_vector may + be supplied in a request. + :arg inference_id: The inference ID to use to convert the query text + into token-weight pairs. It must be the same inference ID that was + used to create the tokens from the input text. Only one of + inference_id and query_vector is allowed. If inference_id is + specified, query must also be specified. Only one of inference_id + or query_vector may be supplied in a request. + :arg query: The query text you want to use for search. If inference_id + is specified, query must also be specified. + :arg prune: Whether to perform pruning, omitting the non-significant + tokens from the query to improve query performance. If prune is + true but the pruning_config is not specified, pruning will occur + but default values will be used. Default: false + :arg pruning_config: Optional pruning configuration. If enabled, this + will omit non-significant tokens from the query in order to + improve query performance. This is only used if prune is set to + true. If prune is set to true but pruning_config is not specified, + default values will be used. + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + name = "sparse_vector" + + def __init__( + self, + *, + field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + query_vector: Union[Mapping[str, float], "DefaultType"] = DEFAULT, + inference_id: Union[str, "DefaultType"] = DEFAULT, + query: Union[str, "DefaultType"] = DEFAULT, + prune: Union[bool, "DefaultType"] = DEFAULT, + pruning_config: Union[ + "types.TokenPruningConfig", Dict[str, Any], "DefaultType" + ] = DEFAULT, + boost: Union[float, "DefaultType"] = DEFAULT, + _name: Union[str, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__( + field=field, + query_vector=query_vector, + inference_id=inference_id, + query=query, + prune=prune, + pruning_config=pruning_config, + boost=boost, + _name=_name, + **kwargs, + ) + + +class Term(Query): + """ + Returns documents that contain an exact term in a provided field. To + return a document, the query term must exactly match the queried + field's value, including whitespace and capitalization. + + :arg _field: The field to use in this query. + :arg _value: The query value for the field. + """ + + name = "term" + + def __init__( + self, + _field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + _value: Union["types.TermQuery", Dict[str, Any], "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + if _field is not DEFAULT: + kwargs[str(_field)] = _value + super().__init__(**kwargs) + + +class Terms(Query): + """ + Returns documents that contain one or more exact terms in a provided + field. To return a document, one or more terms must exactly match a + field value, including whitespace and capitalization. + + :arg _field: The field to use in this query. + :arg _value: The query value for the field. + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + name = "terms" + + def __init__( + self, + _field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + _value: Union[ + Sequence[Union[int, float, str, bool, None, Any]], + "types.TermsLookup", + Dict[str, Any], + "DefaultType", + ] = DEFAULT, + *, + boost: Union[float, "DefaultType"] = DEFAULT, + _name: Union[str, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + if _field is not DEFAULT: + kwargs[str(_field)] = _value + super().__init__(boost=boost, _name=_name, **kwargs) + + def _setattr(self, name: str, value: Any) -> None: + # here we convert any iterables that are not strings to lists + if hasattr(value, "__iter__") and not isinstance(value, (str, list, dict)): + value = list(value) + super()._setattr(name, value) + + +class TermsSet(Query): + """ + Returns documents that contain a minimum number of exact terms in a + provided field. To return a document, a required number of terms must + exactly match the field values, including whitespace and + capitalization. + + :arg _field: The field to use in this query. + :arg _value: The query value for the field. + """ + + name = "terms_set" + + def __init__( + self, + _field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + _value: Union["types.TermsSetQuery", Dict[str, Any], "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + if _field is not DEFAULT: + kwargs[str(_field)] = _value + super().__init__(**kwargs) + + +class TextExpansion(Query): + """ + Uses a natural language processing model to convert the query text + into a list of token-weight pairs which are then used in a query + against a sparse vector or rank features field. + + :arg _field: The field to use in this query. + :arg _value: The query value for the field. + """ + + name = "text_expansion" + + def __init__( + self, + _field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + _value: Union[ + "types.TextExpansionQuery", Dict[str, Any], "DefaultType" + ] = DEFAULT, + **kwargs: Any, + ): + if _field is not DEFAULT: + kwargs[str(_field)] = _value + super().__init__(**kwargs) + + +class WeightedTokens(Query): + """ + Supports returning text_expansion query results by sending in + precomputed tokens with the query. + + :arg _field: The field to use in this query. + :arg _value: The query value for the field. + """ + + name = "weighted_tokens" + + def __init__( + self, + _field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + _value: Union[ + "types.WeightedTokensQuery", Dict[str, Any], "DefaultType" + ] = DEFAULT, + **kwargs: Any, + ): + if _field is not DEFAULT: + kwargs[str(_field)] = _value + super().__init__(**kwargs) + + +class Wildcard(Query): + """ + Returns documents that contain terms matching a wildcard pattern. + + :arg _field: The field to use in this query. + :arg _value: The query value for the field. + """ + + name = "wildcard" + + def __init__( + self, + _field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + _value: Union["types.WildcardQuery", Dict[str, Any], "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + if _field is not DEFAULT: + kwargs[str(_field)] = _value + super().__init__(**kwargs) + + +class Wrapper(Query): + """ + A query that accepts any other query as base64 encoded string. + + :arg query: (required) A base64 encoded query. The binary data format + can be any of JSON, YAML, CBOR or SMILE encodings + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + name = "wrapper" + + def __init__( + self, + *, + query: Union[str, "DefaultType"] = DEFAULT, + boost: Union[float, "DefaultType"] = DEFAULT, + _name: Union[str, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__(query=query, boost=boost, _name=_name, **kwargs) + + +class Type(Query): + """ + :arg value: (required) + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + name = "type" + + def __init__( + self, + *, + value: Union[str, "DefaultType"] = DEFAULT, + boost: Union[float, "DefaultType"] = DEFAULT, + _name: Union[str, "DefaultType"] = DEFAULT, + **kwargs: Any, + ): + super().__init__(value=value, boost=boost, _name=_name, **kwargs) diff --git a/elasticsearch/dsl/response/__init__.py b/elasticsearch/dsl/response/__init__.py new file mode 100644 index 000000000..eea1b87f9 --- /dev/null +++ b/elasticsearch/dsl/response/__init__.py @@ -0,0 +1,354 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import ( + TYPE_CHECKING, + Any, + Dict, + Generic, + Iterator, + List, + Mapping, + Optional, + Sequence, + Tuple, + Union, + cast, +) + +from ..utils import _R, AttrDict, AttrList, _wrap +from .hit import Hit, HitMeta + +if TYPE_CHECKING: + from .. import types + from ..aggs import Agg + from ..faceted_search_base import FacetedSearchBase + from ..search_base import Request, SearchBase + from ..update_by_query_base import UpdateByQueryBase + +__all__ = [ + "Response", + "AggResponse", + "UpdateByQueryResponse", + "Hit", + "HitMeta", + "AggregateResponseType", +] + + +class Response(AttrDict[Any], Generic[_R]): + """An Elasticsearch search response. + + :arg took: (required) + :arg timed_out: (required) + :arg _shards: (required) + :arg hits: search results + :arg aggregations: aggregation results + :arg _clusters: + :arg fields: + :arg max_score: + :arg num_reduce_phases: + :arg profile: + :arg pit_id: + :arg _scroll_id: + :arg suggest: + :arg terminated_early: + """ + + _search: "SearchBase[_R]" + _faceted_search: "FacetedSearchBase[_R]" + _doc_class: Optional[_R] + _hits: List[_R] + + took: int + timed_out: bool + _shards: "types.ShardStatistics" + _clusters: "types.ClusterStatistics" + fields: Mapping[str, Any] + max_score: float + num_reduce_phases: int + profile: "types.Profile" + pit_id: str + _scroll_id: str + suggest: Mapping[ + str, + Sequence[ + Union["types.CompletionSuggest", "types.PhraseSuggest", "types.TermSuggest"] + ], + ] + terminated_early: bool + + def __init__( + self, + search: "Request[_R]", + response: Dict[str, Any], + doc_class: Optional[_R] = None, + ): + super(AttrDict, self).__setattr__("_search", search) + super(AttrDict, self).__setattr__("_doc_class", doc_class) + super().__init__(response) + + def __iter__(self) -> Iterator[_R]: # type: ignore[override] + return iter(self.hits) + + def __getitem__(self, key: Union[slice, int, str]) -> Any: + if isinstance(key, (slice, int)): + # for slicing etc + return self.hits[key] + return super().__getitem__(key) + + def __nonzero__(self) -> bool: + return bool(self.hits) + + __bool__ = __nonzero__ + + def __repr__(self) -> str: + return "" % (self.hits or self.aggregations) + + def __len__(self) -> int: + return len(self.hits) + + def __getstate__(self) -> Tuple[Dict[str, Any], "Request[_R]", Optional[_R]]: # type: ignore[override] + return self._d_, self._search, self._doc_class + + def __setstate__( + self, state: Tuple[Dict[str, Any], "Request[_R]", Optional[_R]] # type: ignore[override] + ) -> None: + super(AttrDict, self).__setattr__("_d_", state[0]) + super(AttrDict, self).__setattr__("_search", state[1]) + super(AttrDict, self).__setattr__("_doc_class", state[2]) + + def success(self) -> bool: + return self._shards.total == self._shards.successful and not self.timed_out + + @property + def hits(self) -> List[_R]: + if not hasattr(self, "_hits"): + h = cast(AttrDict[Any], self._d_["hits"]) + + try: + hits = AttrList(list(map(self._search._get_result, h["hits"]))) + except AttributeError as e: + # avoid raising AttributeError since it will be hidden by the property + raise TypeError("Could not parse hits.", e) + + # avoid assigning _hits into self._d_ + super(AttrDict, self).__setattr__("_hits", hits) + for k in h: + setattr(self._hits, k, _wrap(h[k])) + return self._hits + + @property + def aggregations(self) -> "AggResponse[_R]": + return self.aggs + + @property + def aggs(self) -> "AggResponse[_R]": + if not hasattr(self, "_aggs"): + aggs = AggResponse[_R]( + cast("Agg[_R]", self._search.aggs), + self._search, + cast(Dict[str, Any], self._d_.get("aggregations", {})), + ) + + # avoid assigning _aggs into self._d_ + super(AttrDict, self).__setattr__("_aggs", aggs) + return cast("AggResponse[_R]", self._aggs) + + def search_after(self) -> "SearchBase[_R]": + """ + Return a ``Search`` instance that retrieves the next page of results. + + This method provides an easy way to paginate a long list of results using + the ``search_after`` option. For example:: + + page_size = 20 + s = Search()[:page_size].sort("date") + + while True: + # get a page of results + r = await s.execute() + + # do something with this page of results + + # exit the loop if we reached the end + if len(r.hits) < page_size: + break + + # get a search object with the next page of results + s = r.search_after() + + Note that the ``search_after`` option requires the search to have an + explicit ``sort`` order. + """ + if len(self.hits) == 0: + raise ValueError("Cannot use search_after when there are no search results") + if not hasattr(self.hits[-1].meta, "sort"): # type: ignore + raise ValueError("Cannot use search_after when results are not sorted") + return self._search.extra(search_after=self.hits[-1].meta.sort) # type: ignore + + +AggregateResponseType = Union[ + "types.CardinalityAggregate", + "types.HdrPercentilesAggregate", + "types.HdrPercentileRanksAggregate", + "types.TDigestPercentilesAggregate", + "types.TDigestPercentileRanksAggregate", + "types.PercentilesBucketAggregate", + "types.MedianAbsoluteDeviationAggregate", + "types.MinAggregate", + "types.MaxAggregate", + "types.SumAggregate", + "types.AvgAggregate", + "types.WeightedAvgAggregate", + "types.ValueCountAggregate", + "types.SimpleValueAggregate", + "types.DerivativeAggregate", + "types.BucketMetricValueAggregate", + "types.StatsAggregate", + "types.StatsBucketAggregate", + "types.ExtendedStatsAggregate", + "types.ExtendedStatsBucketAggregate", + "types.GeoBoundsAggregate", + "types.GeoCentroidAggregate", + "types.HistogramAggregate", + "types.DateHistogramAggregate", + "types.AutoDateHistogramAggregate", + "types.VariableWidthHistogramAggregate", + "types.StringTermsAggregate", + "types.LongTermsAggregate", + "types.DoubleTermsAggregate", + "types.UnmappedTermsAggregate", + "types.LongRareTermsAggregate", + "types.StringRareTermsAggregate", + "types.UnmappedRareTermsAggregate", + "types.MultiTermsAggregate", + "types.MissingAggregate", + "types.NestedAggregate", + "types.ReverseNestedAggregate", + "types.GlobalAggregate", + "types.FilterAggregate", + "types.ChildrenAggregate", + "types.ParentAggregate", + "types.SamplerAggregate", + "types.UnmappedSamplerAggregate", + "types.GeoHashGridAggregate", + "types.GeoTileGridAggregate", + "types.GeoHexGridAggregate", + "types.RangeAggregate", + "types.DateRangeAggregate", + "types.GeoDistanceAggregate", + "types.IpRangeAggregate", + "types.IpPrefixAggregate", + "types.FiltersAggregate", + "types.AdjacencyMatrixAggregate", + "types.SignificantLongTermsAggregate", + "types.SignificantStringTermsAggregate", + "types.UnmappedSignificantTermsAggregate", + "types.CompositeAggregate", + "types.FrequentItemSetsAggregate", + "types.TimeSeriesAggregate", + "types.ScriptedMetricAggregate", + "types.TopHitsAggregate", + "types.InferenceAggregate", + "types.StringStatsAggregate", + "types.BoxPlotAggregate", + "types.TopMetricsAggregate", + "types.TTestAggregate", + "types.RateAggregate", + "types.CumulativeCardinalityAggregate", + "types.MatrixStatsAggregate", + "types.GeoLineAggregate", +] + + +class AggResponse(AttrDict[Any], Generic[_R]): + """An Elasticsearch aggregation response.""" + + _meta: Dict[str, Any] + + def __init__(self, aggs: "Agg[_R]", search: "Request[_R]", data: Dict[str, Any]): + super(AttrDict, self).__setattr__("_meta", {"search": search, "aggs": aggs}) + super().__init__(data) + + def __getitem__(self, attr_name: str) -> AggregateResponseType: + if attr_name in self._meta["aggs"]: + # don't do self._meta['aggs'][attr_name] to avoid copying + agg = self._meta["aggs"].aggs[attr_name] + return cast( + AggregateResponseType, + agg.result(self._meta["search"], self._d_[attr_name]), + ) + return super().__getitem__(attr_name) # type: ignore + + def __iter__(self) -> Iterator[AggregateResponseType]: # type: ignore[override] + for name in self._meta["aggs"]: + yield self[name] + + +class UpdateByQueryResponse(AttrDict[Any], Generic[_R]): + """An Elasticsearch update by query response. + + :arg batches: + :arg failures: + :arg noops: + :arg deleted: + :arg requests_per_second: + :arg retries: + :arg task: + :arg timed_out: + :arg took: + :arg total: + :arg updated: + :arg version_conflicts: + :arg throttled: + :arg throttled_millis: + :arg throttled_until: + :arg throttled_until_millis: + """ + + _search: "UpdateByQueryBase[_R]" + + batches: int + failures: Sequence["types.BulkIndexByScrollFailure"] + noops: int + deleted: int + requests_per_second: float + retries: "types.Retries" + task: Union[str, int] + timed_out: bool + took: Any + total: int + updated: int + version_conflicts: int + throttled: Any + throttled_millis: Any + throttled_until: Any + throttled_until_millis: Any + + def __init__( + self, + search: "Request[_R]", + response: Dict[str, Any], + doc_class: Optional[_R] = None, + ): + super(AttrDict, self).__setattr__("_search", search) + super(AttrDict, self).__setattr__("_doc_class", doc_class) + super().__init__(response) + + def success(self) -> bool: + return not self.timed_out and not self.failures diff --git a/elasticsearch/dsl/response/aggs.py b/elasticsearch/dsl/response/aggs.py new file mode 100644 index 000000000..3525e1f92 --- /dev/null +++ b/elasticsearch/dsl/response/aggs.py @@ -0,0 +1,100 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import TYPE_CHECKING, Any, Dict, Iterator, List, Optional, Union, cast + +from ..utils import _R, AttrDict, AttrList +from . import AggResponse, Response + +if TYPE_CHECKING: + from ..aggs import Agg + from ..field import Field + from ..search_base import SearchBase + + +class Bucket(AggResponse[_R]): + def __init__( + self, + aggs: "Agg[_R]", + search: "SearchBase[_R]", + data: Dict[str, Any], + field: Optional["Field"] = None, + ): + super().__init__(aggs, search, data) + + +class FieldBucket(Bucket[_R]): + def __init__( + self, + aggs: "Agg[_R]", + search: "SearchBase[_R]", + data: Dict[str, Any], + field: Optional["Field"] = None, + ): + if field: + data["key"] = field.deserialize(data["key"]) + super().__init__(aggs, search, data, field) + + +class BucketData(AggResponse[_R]): + _bucket_class = Bucket + _buckets: Union[AttrDict[Any], AttrList[Any]] + + def _wrap_bucket(self, data: Dict[str, Any]) -> Bucket[_R]: + return self._bucket_class( + self._meta["aggs"], + self._meta["search"], + data, + field=self._meta.get("field"), + ) + + def __iter__(self) -> Iterator["Agg"]: # type: ignore[override] + return iter(self.buckets) # type: ignore + + def __len__(self) -> int: + return len(self.buckets) + + def __getitem__(self, key: Any) -> Any: + if isinstance(key, (int, slice)): + return cast(AttrList[Any], self.buckets)[key] + return super().__getitem__(key) + + @property + def buckets(self) -> Union[AttrDict[Any], AttrList[Any]]: + if not hasattr(self, "_buckets"): + field = getattr(self._meta["aggs"], "field", None) + if field: + self._meta["field"] = self._meta["search"]._resolve_field(field) + bs = cast(Union[Dict[str, Any], List[Any]], self._d_["buckets"]) + if isinstance(bs, list): + ret = AttrList(bs, obj_wrapper=self._wrap_bucket) + else: + ret = AttrDict[Any]({k: self._wrap_bucket(bs[k]) for k in bs}) # type: ignore + super(AttrDict, self).__setattr__("_buckets", ret) + return self._buckets + + +class FieldBucketData(BucketData[_R]): + _bucket_class = FieldBucket + + +class TopHitsData(Response[_R]): + def __init__(self, agg: "Agg[_R]", search: "SearchBase[_R]", data: Any): + super(AttrDict, self).__setattr__( + "meta", AttrDict({"agg": agg, "search": search}) + ) + super().__init__(search, data) diff --git a/elasticsearch/dsl/response/hit.py b/elasticsearch/dsl/response/hit.py new file mode 100644 index 000000000..a09d36e9c --- /dev/null +++ b/elasticsearch/dsl/response/hit.py @@ -0,0 +1,53 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import Any, Dict, List, Tuple, cast + +from ..utils import AttrDict, HitMeta + + +class Hit(AttrDict[Any]): + def __init__(self, document: Dict[str, Any]): + data: Dict[str, Any] = {} + if "_source" in document: + data = cast(Dict[str, Any], document["_source"]) + if "fields" in document: + data.update(cast(Dict[str, Any], document["fields"])) + + super().__init__(data) + # assign meta as attribute and not as key in self._d_ + super(AttrDict, self).__setattr__("meta", HitMeta(document)) + + def __getstate__(self) -> Tuple[Dict[str, Any], HitMeta]: # type: ignore[override] + # add self.meta since it is not in self.__dict__ + return super().__getstate__() + (self.meta,) + + def __setstate__(self, state: Tuple[Dict[str, Any], HitMeta]) -> None: # type: ignore[override] + super(AttrDict, self).__setattr__("meta", state[-1]) + super().__setstate__(state[:-1]) + + def __dir__(self) -> List[str]: + # be sure to expose meta in dir(self) + return super().__dir__() + ["meta"] + + def __repr__(self) -> str: + return "".format( + "/".join( + getattr(self.meta, key) for key in ("index", "id") if key in self.meta + ), + super().__repr__(), + ) diff --git a/elasticsearch/dsl/search.py b/elasticsearch/dsl/search.py new file mode 100644 index 000000000..eea200e00 --- /dev/null +++ b/elasticsearch/dsl/search.py @@ -0,0 +1,28 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from ._async.search import ( # noqa: F401 + AsyncEmptySearch, + AsyncMultiSearch, + AsyncSearch, +) +from ._sync.search import ( # noqa: F401 + EmptySearch, + MultiSearch, + Search, +) +from .search_base import Q # noqa: F401 diff --git a/elasticsearch/dsl/search_base.py b/elasticsearch/dsl/search_base.py new file mode 100644 index 000000000..ad4a56059 --- /dev/null +++ b/elasticsearch/dsl/search_base.py @@ -0,0 +1,1040 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import collections.abc +import copy +from typing import ( + TYPE_CHECKING, + Any, + Callable, + Dict, + Generic, + Iterator, + List, + Optional, + Protocol, + Tuple, + Type, + Union, + cast, + overload, +) + +from typing_extensions import Self, TypeVar + +from .aggs import A, Agg, AggBase +from .document_base import InstrumentedField +from .exceptions import IllegalOperation +from .query import Bool, Q, Query +from .response import Hit, Response +from .utils import _R, AnyUsingType, AttrDict, DslBase, recursive_to_dict + +if TYPE_CHECKING: + from .field import Field, Object + + +class SupportsClone(Protocol): + def _clone(self) -> Self: ... + + +_S = TypeVar("_S", bound=SupportsClone) + + +class QueryProxy(Generic[_S]): + """ + Simple proxy around DSL objects (queries) that can be called + (to add query/post_filter) and also allows attribute access which is proxied to + the wrapped query. + """ + + def __init__(self, search: _S, attr_name: str): + self._search = search + self._proxied: Optional[Query] = None + self._attr_name = attr_name + + def __nonzero__(self) -> bool: + return self._proxied is not None + + __bool__ = __nonzero__ + + def __call__(self, *args: Any, **kwargs: Any) -> _S: + s = self._search._clone() + + # we cannot use self._proxied since we just cloned self._search and + # need to access the new self on the clone + proxied = getattr(s, self._attr_name) + if proxied._proxied is None: + proxied._proxied = Q(*args, **kwargs) + else: + proxied._proxied &= Q(*args, **kwargs) + + # always return search to be chainable + return s + + def __getattr__(self, attr_name: str) -> Any: + return getattr(self._proxied, attr_name) + + def __setattr__(self, attr_name: str, value: Any) -> None: + if not attr_name.startswith("_"): + if self._proxied is not None: + self._proxied = Q(self._proxied.to_dict()) + setattr(self._proxied, attr_name, value) + super().__setattr__(attr_name, value) + + def __getstate__(self) -> Tuple[_S, Optional[Query], str]: + return self._search, self._proxied, self._attr_name + + def __setstate__(self, state: Tuple[_S, Optional[Query], str]) -> None: + self._search, self._proxied, self._attr_name = state + + +class ProxyDescriptor(Generic[_S]): + """ + Simple descriptor to enable setting of queries and filters as: + + s = Search() + s.query = Q(...) + + """ + + def __init__(self, name: str): + self._attr_name = f"_{name}_proxy" + + def __get__(self, instance: Any, owner: object) -> QueryProxy[_S]: + return cast(QueryProxy[_S], getattr(instance, self._attr_name)) + + def __set__(self, instance: _S, value: Dict[str, Any]) -> None: + proxy: QueryProxy[_S] = getattr(instance, self._attr_name) + proxy._proxied = Q(value) + + +class AggsProxy(AggBase[_R], DslBase): + name = "aggs" + + def __init__(self, search: "SearchBase[_R]"): + self._base = cast("Agg[_R]", self) + self._search = search + self._params = {"aggs": {}} + + def to_dict(self) -> Dict[str, Any]: + return cast(Dict[str, Any], super().to_dict().get("aggs", {})) + + +class Request(Generic[_R]): + def __init__( + self, + using: AnyUsingType = "default", + index: Optional[Union[str, List[str]]] = None, + doc_type: Optional[ + Union[type, str, List[Union[type, str]], Dict[str, Union[type, str]]] + ] = None, + extra: Optional[Dict[str, Any]] = None, + ): + self._using = using + + self._index = None + if isinstance(index, (tuple, list)): + self._index = list(index) + elif index: + self._index = [index] + + self._doc_type: List[Union[type, str]] = [] + self._doc_type_map: Dict[str, Any] = {} + if isinstance(doc_type, (tuple, list)): + self._doc_type.extend(doc_type) + elif isinstance(doc_type, collections.abc.Mapping): + self._doc_type.extend(doc_type.keys()) + self._doc_type_map.update(doc_type) + elif doc_type: + self._doc_type.append(doc_type) + + self._params: Dict[str, Any] = {} + self._extra: Dict[str, Any] = extra or {} + + def __eq__(self, other: Any) -> bool: + return ( + isinstance(other, Request) + and other._params == self._params + and other._index == self._index + and other._doc_type == self._doc_type + and other.to_dict() == self.to_dict() + ) + + def __copy__(self) -> Self: + return self._clone() + + def params(self, **kwargs: Any) -> Self: + """ + Specify query params to be used when executing the search. All the + keyword arguments will override the current values. See + https://elasticsearch-py.readthedocs.io/en/latest/api/elasticsearch.html#elasticsearch.Elasticsearch.search + for all available parameters. + + Example:: + + s = Search() + s = s.params(routing='user-1', preference='local') + """ + s = self._clone() + s._params.update(kwargs) + return s + + def index(self, *index: Union[str, List[str], Tuple[str, ...]]) -> Self: + """ + Set the index for the search. If called empty it will remove all information. + + Example:: + + s = Search() + s = s.index('twitter-2015.01.01', 'twitter-2015.01.02') + s = s.index(['twitter-2015.01.01', 'twitter-2015.01.02']) + """ + # .index() resets + s = self._clone() + if not index: + s._index = None + else: + indexes = [] + for i in index: + if isinstance(i, str): + indexes.append(i) + elif isinstance(i, list): + indexes += i + elif isinstance(i, tuple): + indexes += list(i) + + s._index = (self._index or []) + indexes + + return s + + def _resolve_field(self, path: str) -> Optional["Field"]: + for dt in self._doc_type: + if not hasattr(dt, "_index"): + continue + field = dt._index.resolve_field(path) + if field is not None: + return cast("Field", field) + return None + + def _resolve_nested( + self, hit: AttrDict[Any], parent_class: Optional[type] = None + ) -> Type[_R]: + doc_class = Hit + + nested_path = [] + nesting = hit["_nested"] + while nesting and "field" in nesting: + nested_path.append(nesting["field"]) + nesting = nesting.get("_nested") + nested_path_str = ".".join(nested_path) + + nested_field: Optional["Object"] + if parent_class is not None and hasattr(parent_class, "_index"): + nested_field = cast( + Optional["Object"], parent_class._index.resolve_field(nested_path_str) + ) + else: + nested_field = cast( + Optional["Object"], self._resolve_field(nested_path_str) + ) + + if nested_field is not None: + return cast(Type[_R], nested_field._doc_class) + + return cast(Type[_R], doc_class) + + def _get_result( + self, hit: AttrDict[Any], parent_class: Optional[type] = None + ) -> _R: + doc_class: Any = Hit + dt = hit.get("_type") + + if "_nested" in hit: + doc_class = self._resolve_nested(hit, parent_class) + + elif dt in self._doc_type_map: + doc_class = self._doc_type_map[dt] + + else: + for doc_type in self._doc_type: + if hasattr(doc_type, "_matches") and doc_type._matches(hit): + doc_class = doc_type + break + + for t in hit.get("inner_hits", ()): + hit["inner_hits"][t] = Response[_R]( + self, hit["inner_hits"][t], doc_class=doc_class + ) + + callback = getattr(doc_class, "from_es", doc_class) + return cast(_R, callback(hit)) + + def doc_type( + self, *doc_type: Union[type, str], **kwargs: Callable[[AttrDict[Any]], Any] + ) -> Self: + """ + Set the type to search through. You can supply a single value or + multiple. Values can be strings or subclasses of ``Document``. + + You can also pass in any keyword arguments, mapping a doc_type to a + callback that should be used instead of the Hit class. + + If no doc_type is supplied any information stored on the instance will + be erased. + + Example: + + s = Search().doc_type('product', 'store', User, custom=my_callback) + """ + # .doc_type() resets + s = self._clone() + if not doc_type and not kwargs: + s._doc_type = [] + s._doc_type_map = {} + else: + s._doc_type.extend(doc_type) + s._doc_type.extend(kwargs.keys()) + s._doc_type_map.update(kwargs) + return s + + def using(self, client: AnyUsingType) -> Self: + """ + Associate the search request with an elasticsearch client. A fresh copy + will be returned with current instance remaining unchanged. + + :arg client: an instance of ``elasticsearch.Elasticsearch`` to use or + an alias to look up in ``elasticsearch.dsl.connections`` + + """ + s = self._clone() + s._using = client + return s + + def extra(self, **kwargs: Any) -> Self: + """ + Add extra keys to the request body. Mostly here for backwards + compatibility. + """ + s = self._clone() + if "from_" in kwargs: + kwargs["from"] = kwargs.pop("from_") + s._extra.update(kwargs) + return s + + def _clone(self) -> Self: + s = self.__class__( + using=self._using, index=self._index, doc_type=self._doc_type + ) + s._doc_type_map = self._doc_type_map.copy() + s._extra = self._extra.copy() + s._params = self._params.copy() + return s + + if TYPE_CHECKING: + + def to_dict(self) -> Dict[str, Any]: ... + + +class SearchBase(Request[_R]): + query = ProxyDescriptor[Self]("query") + post_filter = ProxyDescriptor[Self]("post_filter") + _response: Response[_R] + + def __init__(self, **kwargs: Any): + """ + Search request to elasticsearch. + + :arg using: `Elasticsearch` instance to use + :arg index: limit the search to index + :arg doc_type: only query this type. + + All the parameters supplied (or omitted) at creation type can be later + overridden by methods (`using`, `index` and `doc_type` respectively). + """ + super().__init__(**kwargs) + + self.aggs = AggsProxy[_R](self) + self._sort: List[Union[str, Dict[str, Dict[str, str]]]] = [] + self._knn: List[Dict[str, Any]] = [] + self._rank: Dict[str, Any] = {} + self._collapse: Dict[str, Any] = {} + self._source: Optional[Union[bool, List[str], Dict[str, List[str]]]] = None + self._highlight: Dict[str, Any] = {} + self._highlight_opts: Dict[str, Any] = {} + self._suggest: Dict[str, Any] = {} + self._script_fields: Dict[str, Any] = {} + self._response_class = Response[_R] + + self._query_proxy = QueryProxy(self, "query") + self._post_filter_proxy = QueryProxy(self, "post_filter") + + def filter(self, *args: Any, **kwargs: Any) -> Self: + return self.query(Bool(filter=[Q(*args, **kwargs)])) + + def exclude(self, *args: Any, **kwargs: Any) -> Self: + return self.query(Bool(filter=[~Q(*args, **kwargs)])) + + def __getitem__(self, n: Union[int, slice]) -> Self: + """ + Support slicing the `Search` instance for pagination. + + Slicing equates to the from/size parameters. E.g.:: + + s = Search().query(...)[0:25] + + is equivalent to:: + + s = Search().query(...).extra(from_=0, size=25) + + """ + s = self._clone() + + if isinstance(n, slice): + # If negative slicing, abort. + if n.start and n.start < 0 or n.stop and n.stop < 0: + raise ValueError("Search does not support negative slicing.") + slice_start = n.start + slice_stop = n.stop + else: # This is an index lookup, equivalent to slicing by [n:n+1]. + # If negative index, abort. + if n < 0: + raise ValueError("Search does not support negative indexing.") + slice_start = n + slice_stop = n + 1 + + old_from = s._extra.get("from") + old_to = None + if "size" in s._extra: + old_to = (old_from or 0) + s._extra["size"] + + new_from = old_from + if slice_start is not None: + new_from = (old_from or 0) + slice_start + new_to = old_to + if slice_stop is not None: + new_to = (old_from or 0) + slice_stop + if old_to is not None and old_to < new_to: + new_to = old_to + + if new_from is not None: + s._extra["from"] = new_from + if new_to is not None: + s._extra["size"] = max(0, new_to - (new_from or 0)) + return s + + @classmethod + def from_dict(cls, d: Dict[str, Any]) -> Self: + """ + Construct a new `Search` instance from a raw dict containing the search + body. Useful when migrating from raw dictionaries. + + Example:: + + s = Search.from_dict({ + "query": { + "bool": { + "must": [...] + } + }, + "aggs": {...} + }) + s = s.filter('term', published=True) + """ + s = cls() + s.update_from_dict(d) + return s + + def _clone(self) -> Self: + """ + Return a clone of the current search request. Performs a shallow copy + of all the underlying objects. Used internally by most state modifying + APIs. + """ + s = super()._clone() + + s._response_class = self._response_class + s._knn = [knn.copy() for knn in self._knn] + s._rank = self._rank.copy() + s._collapse = self._collapse.copy() + s._sort = self._sort[:] + s._source = copy.copy(self._source) if self._source is not None else None + s._highlight = self._highlight.copy() + s._highlight_opts = self._highlight_opts.copy() + s._suggest = self._suggest.copy() + s._script_fields = self._script_fields.copy() + for x in ("query", "post_filter"): + getattr(s, x)._proxied = getattr(self, x)._proxied + + # copy top-level bucket definitions + if self.aggs._params.get("aggs"): + s.aggs._params = {"aggs": self.aggs._params["aggs"].copy()} + return s + + def response_class(self, cls: Type[Response[_R]]) -> Self: + """ + Override the default wrapper used for the response. + """ + s = self._clone() + s._response_class = cls + return s + + def update_from_dict(self, d: Dict[str, Any]) -> Self: + """ + Apply options from a serialized body to the current instance. Modifies + the object in-place. Used mostly by ``from_dict``. + """ + d = d.copy() + if "query" in d: + self.query._proxied = Q(d.pop("query")) + if "post_filter" in d: + self.post_filter._proxied = Q(d.pop("post_filter")) + + aggs = d.pop("aggs", d.pop("aggregations", {})) + if aggs: + self.aggs._params = { + "aggs": {name: A(value) for (name, value) in aggs.items()} + } + if "knn" in d: + self._knn = d.pop("knn") + if isinstance(self._knn, dict): + self._knn = [self._knn] + if "rank" in d: + self._rank = d.pop("rank") + if "collapse" in d: + self._collapse = d.pop("collapse") + if "sort" in d: + self._sort = d.pop("sort") + if "_source" in d: + self._source = d.pop("_source") + if "highlight" in d: + high = d.pop("highlight").copy() + self._highlight = high.pop("fields") + self._highlight_opts = high + if "suggest" in d: + self._suggest = d.pop("suggest") + if "text" in self._suggest: + text = self._suggest.pop("text") + for s in self._suggest.values(): + s.setdefault("text", text) + if "script_fields" in d: + self._script_fields = d.pop("script_fields") + self._extra.update(d) + return self + + def script_fields(self, **kwargs: Any) -> Self: + """ + Define script fields to be calculated on hits. See + https://www.elastic.co/guide/en/elasticsearch/reference/current/search-request-script-fields.html + for more details. + + Example:: + + s = Search() + s = s.script_fields(times_two="doc['field'].value * 2") + s = s.script_fields( + times_three={ + 'script': { + 'lang': 'painless', + 'source': "doc['field'].value * params.n", + 'params': {'n': 3} + } + } + ) + + """ + s = self._clone() + for name in kwargs: + if isinstance(kwargs[name], str): + kwargs[name] = {"script": kwargs[name]} + s._script_fields.update(kwargs) + return s + + def knn( + self, + field: Union[str, "InstrumentedField"], + k: int, + num_candidates: int, + query_vector: Optional[List[float]] = None, + query_vector_builder: Optional[Dict[str, Any]] = None, + boost: Optional[float] = None, + filter: Optional[Query] = None, + similarity: Optional[float] = None, + inner_hits: Optional[Dict[str, Any]] = None, + ) -> Self: + """ + Add a k-nearest neighbor (kNN) search. + + :arg field: the vector field to search against as a string or document class attribute + :arg k: number of nearest neighbors to return as top hits + :arg num_candidates: number of nearest neighbor candidates to consider per shard + :arg query_vector: the vector to search for + :arg query_vector_builder: A dictionary indicating how to build a query vector + :arg boost: A floating-point boost factor for kNN scores + :arg filter: query to filter the documents that can match + :arg similarity: the minimum similarity required for a document to be considered a match, as a float value + :arg inner_hits: retrieve hits from nested field + + Example:: + + s = Search() + s = s.knn(field='embedding', k=5, num_candidates=10, query_vector=vector, + filter=Q('term', category='blog'))) + """ + s = self._clone() + s._knn.append( + { + "field": str(field), # str() is for InstrumentedField instances + "k": k, + "num_candidates": num_candidates, + } + ) + if query_vector is None and query_vector_builder is None: + raise ValueError("one of query_vector and query_vector_builder is required") + if query_vector is not None and query_vector_builder is not None: + raise ValueError( + "only one of query_vector and query_vector_builder must be given" + ) + if query_vector is not None: + s._knn[-1]["query_vector"] = cast(Any, query_vector) + if query_vector_builder is not None: + s._knn[-1]["query_vector_builder"] = query_vector_builder + if boost is not None: + s._knn[-1]["boost"] = boost + if filter is not None: + if isinstance(filter, Query): + s._knn[-1]["filter"] = filter.to_dict() + else: + s._knn[-1]["filter"] = filter + if similarity is not None: + s._knn[-1]["similarity"] = similarity + if inner_hits is not None: + s._knn[-1]["inner_hits"] = inner_hits + return s + + def rank(self, rrf: Optional[Union[bool, Dict[str, Any]]] = None) -> Self: + """ + Defines a method for combining and ranking results sets from a combination + of searches. Requires a minimum of 2 results sets. + + :arg rrf: Set to ``True`` or an options dictionary to set the rank method to reciprocal rank fusion (RRF). + + Example:: + + s = Search() + s = s.query('match', content='search text') + s = s.knn(field='embedding', k=5, num_candidates=10, query_vector=vector) + s = s.rank(rrf=True) + + Note: This option is in technical preview and may change in the future. The syntax will likely change before GA. + """ + s = self._clone() + s._rank = {} + if rrf is not None and rrf is not False: + s._rank["rrf"] = {} if rrf is True else rrf + return s + + def source( + self, + fields: Optional[ + Union[ + bool, + str, + "InstrumentedField", + List[Union[str, "InstrumentedField"]], + Dict[str, List[Union[str, "InstrumentedField"]]], + ] + ] = None, + **kwargs: Any, + ) -> Self: + """ + Selectively control how the _source field is returned. + + :arg fields: field name, wildcard string, list of field names or wildcards, + or dictionary of includes and excludes + :arg kwargs: ``includes`` or ``excludes`` arguments, when ``fields`` is ``None``. + + When no arguments are given, the entire document will be returned for + each hit. If ``fields`` is a string or list of strings, the field names or field + wildcards given will be included. If ``fields`` is a dictionary with keys of + 'includes' and/or 'excludes' the fields will be either included or excluded + appropriately. + + Calling this multiple times with the same named parameter will override the + previous values with the new ones. + + Example:: + + s = Search() + s = s.source(includes=['obj1.*'], excludes=["*.description"]) + + s = Search() + s = s.source(includes=['obj1.*']).source(excludes=["*.description"]) + + """ + s = self._clone() + + if fields and kwargs: + raise ValueError("You cannot specify fields and kwargs at the same time.") + + @overload + def ensure_strings(fields: str) -> str: ... + + @overload + def ensure_strings(fields: "InstrumentedField") -> str: ... + + @overload + def ensure_strings( + fields: List[Union[str, "InstrumentedField"]] + ) -> List[str]: ... + + @overload + def ensure_strings( + fields: Dict[str, List[Union[str, "InstrumentedField"]]] + ) -> Dict[str, List[str]]: ... + + def ensure_strings( + fields: Union[ + str, + "InstrumentedField", + List[Union[str, "InstrumentedField"]], + Dict[str, List[Union[str, "InstrumentedField"]]], + ] + ) -> Union[str, List[str], Dict[str, List[str]]]: + if isinstance(fields, dict): + return {k: ensure_strings(v) for k, v in fields.items()} + elif not isinstance(fields, (str, InstrumentedField)): + # we assume that if `fields` is not a any of [dict, str, + # InstrumentedField] then it is an iterable of strings or + # InstrumentedFields, so we convert them to a plain list of + # strings + return [str(f) for f in fields] + else: + return str(fields) + + if fields is not None: + s._source = fields if isinstance(fields, bool) else ensure_strings(fields) # type: ignore[assignment] + return s + + if kwargs and not isinstance(s._source, dict): + s._source = {} + + if isinstance(s._source, dict): + for key, value in kwargs.items(): + if value is None: + try: + del s._source[key] + except KeyError: + pass + else: + s._source[key] = ensure_strings(value) + + return s + + def sort( + self, *keys: Union[str, "InstrumentedField", Dict[str, Dict[str, str]]] + ) -> Self: + """ + Add sorting information to the search request. If called without + arguments it will remove all sort requirements. Otherwise it will + replace them. Acceptable arguments are:: + + 'some.field' + '-some.other.field' + {'different.field': {'any': 'dict'}} + + so for example:: + + s = Search().sort( + 'category', + '-title', + {"price" : {"order" : "asc", "mode" : "avg"}} + ) + + will sort by ``category``, ``title`` (in descending order) and + ``price`` in ascending order using the ``avg`` mode. + + The API returns a copy of the Search object and can thus be chained. + """ + s = self._clone() + s._sort = [] + for k in keys: + if not isinstance(k, dict): + sort_field = str(k) + if sort_field.startswith("-"): + if sort_field[1:] == "_score": + raise IllegalOperation("Sorting by `-_score` is not allowed.") + s._sort.append({sort_field[1:]: {"order": "desc"}}) + else: + s._sort.append(sort_field) + else: + s._sort.append(k) + return s + + def collapse( + self, + field: Optional[Union[str, "InstrumentedField"]] = None, + inner_hits: Optional[Dict[str, Any]] = None, + max_concurrent_group_searches: Optional[int] = None, + ) -> Self: + """ + Add collapsing information to the search request. + If called without providing ``field``, it will remove all collapse + requirements, otherwise it will replace them with the provided + arguments. + The API returns a copy of the Search object and can thus be chained. + """ + s = self._clone() + s._collapse = {} + + if field is None: + return s + + s._collapse["field"] = str(field) + if inner_hits: + s._collapse["inner_hits"] = inner_hits + if max_concurrent_group_searches: + s._collapse["max_concurrent_group_searches"] = max_concurrent_group_searches + return s + + def highlight_options(self, **kwargs: Any) -> Self: + """ + Update the global highlighting options used for this request. For + example:: + + s = Search() + s = s.highlight_options(order='score') + """ + s = self._clone() + s._highlight_opts.update(kwargs) + return s + + def highlight( + self, *fields: Union[str, "InstrumentedField"], **kwargs: Any + ) -> Self: + """ + Request highlighting of some fields. All keyword arguments passed in will be + used as parameters for all the fields in the ``fields`` parameter. Example:: + + Search().highlight('title', 'body', fragment_size=50) + + will produce the equivalent of:: + + { + "highlight": { + "fields": { + "body": {"fragment_size": 50}, + "title": {"fragment_size": 50} + } + } + } + + If you want to have different options for different fields + you can call ``highlight`` twice:: + + Search().highlight('title', fragment_size=50).highlight('body', fragment_size=100) + + which will produce:: + + { + "highlight": { + "fields": { + "body": {"fragment_size": 100}, + "title": {"fragment_size": 50} + } + } + } + + """ + s = self._clone() + for f in fields: + s._highlight[str(f)] = kwargs + return s + + def suggest( + self, + name: str, + text: Optional[str] = None, + regex: Optional[str] = None, + **kwargs: Any, + ) -> Self: + """ + Add a suggestions request to the search. + + :arg name: name of the suggestion + :arg text: text to suggest on + + All keyword arguments will be added to the suggestions body. For example:: + + s = Search() + s = s.suggest('suggestion-1', 'Elasticsearch', term={'field': 'body'}) + + # regex query for Completion Suggester + s = Search() + s = s.suggest('suggestion-1', regex='py[thon|py]', completion={'field': 'body'}) + """ + if text is None and regex is None: + raise ValueError('You have to pass "text" or "regex" argument.') + if text and regex: + raise ValueError('You can only pass either "text" or "regex" argument.') + if regex and "completion" not in kwargs: + raise ValueError( + '"regex" argument must be passed with "completion" keyword argument.' + ) + + s = self._clone() + if regex: + s._suggest[name] = {"regex": regex} + elif text: + if "completion" in kwargs: + s._suggest[name] = {"prefix": text} + else: + s._suggest[name] = {"text": text} + s._suggest[name].update(kwargs) + return s + + def search_after(self) -> Self: + """ + Return a ``Search`` instance that retrieves the next page of results. + + This method provides an easy way to paginate a long list of results using + the ``search_after`` option. For example:: + + page_size = 20 + s = Search()[:page_size].sort("date") + + while True: + # get a page of results + r = await s.execute() + + # do something with this page of results + + # exit the loop if we reached the end + if len(r.hits) < page_size: + break + + # get a search object with the next page of results + s = s.search_after() + + Note that the ``search_after`` option requires the search to have an + explicit ``sort`` order. + """ + if not hasattr(self, "_response"): + raise ValueError("A search must be executed before using search_after") + return cast(Self, self._response.search_after()) + + def to_dict(self, count: bool = False, **kwargs: Any) -> Dict[str, Any]: + """ + Serialize the search into the dictionary that will be sent over as the + request's body. + + :arg count: a flag to specify if we are interested in a body for count - + no aggregations, no pagination bounds etc. + + All additional keyword arguments will be included into the dictionary. + """ + d = {} + + if self.query: + d["query"] = recursive_to_dict(self.query) + + if self._knn: + if len(self._knn) == 1: + d["knn"] = self._knn[0] + else: + d["knn"] = self._knn + + if self._rank: + d["rank"] = self._rank + + # count request doesn't care for sorting and other things + if not count: + if self.post_filter: + d["post_filter"] = recursive_to_dict(self.post_filter.to_dict()) + + if self.aggs.aggs: + d.update(recursive_to_dict(self.aggs.to_dict())) + + if self._sort: + d["sort"] = self._sort + + if self._collapse: + d["collapse"] = self._collapse + + d.update(recursive_to_dict(self._extra)) + + if self._source not in (None, {}): + d["_source"] = self._source + + if self._highlight: + d["highlight"] = {"fields": self._highlight} + d["highlight"].update(self._highlight_opts) + + if self._suggest: + d["suggest"] = self._suggest + + if self._script_fields: + d["script_fields"] = self._script_fields + + d.update(recursive_to_dict(kwargs)) + return d + + +class MultiSearchBase(Request[_R]): + """ + Combine multiple :class:`~elasticsearch.dsl.Search` objects into a single + request. + """ + + def __init__(self, **kwargs: Any): + super().__init__(**kwargs) + self._searches: List[SearchBase[_R]] = [] + + def __getitem__(self, key: Union[int, slice]) -> Any: + return self._searches[key] + + def __iter__(self) -> Iterator[SearchBase[_R]]: + return iter(self._searches) + + def _clone(self) -> Self: + ms = super()._clone() + ms._searches = self._searches[:] + return ms + + def add(self, search: SearchBase[_R]) -> Self: + """ + Adds a new :class:`~elasticsearch.dsl.Search` object to the request:: + + ms = MultiSearch(index='my-index') + ms = ms.add(Search(doc_type=Category).filter('term', category='python')) + ms = ms.add(Search(doc_type=Blog)) + """ + ms = self._clone() + ms._searches.append(search) + return ms + + def to_dict(self) -> List[Dict[str, Any]]: # type: ignore[override] + out: List[Dict[str, Any]] = [] + for s in self._searches: + meta: Dict[str, Any] = {} + if s._index: + meta["index"] = cast(Any, s._index) + meta.update(s._params) + + out.append(meta) + out.append(s.to_dict()) + + return out diff --git a/elasticsearch/dsl/serializer.py b/elasticsearch/dsl/serializer.py new file mode 100644 index 000000000..3080f1dad --- /dev/null +++ b/elasticsearch/dsl/serializer.py @@ -0,0 +1,34 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import Any + +from elasticsearch.serializer import JSONSerializer + +from .utils import AttrList + + +class AttrJSONSerializer(JSONSerializer): + def default(self, data: Any) -> Any: + if isinstance(data, AttrList): + return data._l_ + if hasattr(data, "to_dict"): + return data.to_dict() + return super().default(data) + + +serializer = AttrJSONSerializer() diff --git a/elasticsearch/dsl/types.py b/elasticsearch/dsl/types.py new file mode 100644 index 000000000..756f6ef2d --- /dev/null +++ b/elasticsearch/dsl/types.py @@ -0,0 +1,6272 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import Any, Dict, Literal, Mapping, Sequence, Union + +from elastic_transport.client_utils import DEFAULT, DefaultType + +from . import Query +from .document_base import InstrumentedField +from .utils import AttrDict + +PipeSeparatedFlags = str + + +class AggregationRange(AttrDict[Any]): + """ + :arg from: Start of the range (inclusive). + :arg key: Custom key to return the range with. + :arg to: End of the range (exclusive). + """ + + from_: Union[float, None, DefaultType] + key: Union[str, DefaultType] + to: Union[float, None, DefaultType] + + def __init__( + self, + *, + from_: Union[float, None, DefaultType] = DEFAULT, + key: Union[str, DefaultType] = DEFAULT, + to: Union[float, None, DefaultType] = DEFAULT, + **kwargs: Any, + ): + if from_ is not DEFAULT: + kwargs["from_"] = from_ + if key is not DEFAULT: + kwargs["key"] = key + if to is not DEFAULT: + kwargs["to"] = to + super().__init__(kwargs) + + +class BucketCorrelationFunction(AttrDict[Any]): + """ + :arg count_correlation: (required) The configuration to calculate a + count correlation. This function is designed for determining the + correlation of a term value and a given metric. + """ + + count_correlation: Union[ + "BucketCorrelationFunctionCountCorrelation", Dict[str, Any], DefaultType + ] + + def __init__( + self, + *, + count_correlation: Union[ + "BucketCorrelationFunctionCountCorrelation", Dict[str, Any], DefaultType + ] = DEFAULT, + **kwargs: Any, + ): + if count_correlation is not DEFAULT: + kwargs["count_correlation"] = count_correlation + super().__init__(kwargs) + + +class BucketCorrelationFunctionCountCorrelation(AttrDict[Any]): + """ + :arg indicator: (required) The indicator with which to correlate the + configured `bucket_path` values. + """ + + indicator: Union[ + "BucketCorrelationFunctionCountCorrelationIndicator", + Dict[str, Any], + DefaultType, + ] + + def __init__( + self, + *, + indicator: Union[ + "BucketCorrelationFunctionCountCorrelationIndicator", + Dict[str, Any], + DefaultType, + ] = DEFAULT, + **kwargs: Any, + ): + if indicator is not DEFAULT: + kwargs["indicator"] = indicator + super().__init__(kwargs) + + +class BucketCorrelationFunctionCountCorrelationIndicator(AttrDict[Any]): + """ + :arg doc_count: (required) The total number of documents that + initially created the expectations. It’s required to be greater + than or equal to the sum of all values in the buckets_path as this + is the originating superset of data to which the term values are + correlated. + :arg expectations: (required) An array of numbers with which to + correlate the configured `bucket_path` values. The length of this + value must always equal the number of buckets returned by the + `bucket_path`. + :arg fractions: An array of fractions to use when averaging and + calculating variance. This should be used if the pre-calculated + data and the buckets_path have known gaps. The length of + fractions, if provided, must equal expectations. + """ + + doc_count: Union[int, DefaultType] + expectations: Union[Sequence[float], DefaultType] + fractions: Union[Sequence[float], DefaultType] + + def __init__( + self, + *, + doc_count: Union[int, DefaultType] = DEFAULT, + expectations: Union[Sequence[float], DefaultType] = DEFAULT, + fractions: Union[Sequence[float], DefaultType] = DEFAULT, + **kwargs: Any, + ): + if doc_count is not DEFAULT: + kwargs["doc_count"] = doc_count + if expectations is not DEFAULT: + kwargs["expectations"] = expectations + if fractions is not DEFAULT: + kwargs["fractions"] = fractions + super().__init__(kwargs) + + +class ChiSquareHeuristic(AttrDict[Any]): + """ + :arg background_is_superset: (required) Set to `false` if you defined + a custom background filter that represents a different set of + documents that you want to compare to. + :arg include_negatives: (required) Set to `false` to filter out the + terms that appear less often in the subset than in documents + outside the subset. + """ + + background_is_superset: Union[bool, DefaultType] + include_negatives: Union[bool, DefaultType] + + def __init__( + self, + *, + background_is_superset: Union[bool, DefaultType] = DEFAULT, + include_negatives: Union[bool, DefaultType] = DEFAULT, + **kwargs: Any, + ): + if background_is_superset is not DEFAULT: + kwargs["background_is_superset"] = background_is_superset + if include_negatives is not DEFAULT: + kwargs["include_negatives"] = include_negatives + super().__init__(kwargs) + + +class ClassificationInferenceOptions(AttrDict[Any]): + """ + :arg num_top_classes: Specifies the number of top class predictions to + return. Defaults to 0. + :arg num_top_feature_importance_values: Specifies the maximum number + of feature importance values per document. + :arg prediction_field_type: Specifies the type of the predicted field + to write. Acceptable values are: string, number, boolean. When + boolean is provided 1.0 is transformed to true and 0.0 to false. + :arg results_field: The field that is added to incoming documents to + contain the inference prediction. Defaults to predicted_value. + :arg top_classes_results_field: Specifies the field to which the top + classes are written. Defaults to top_classes. + """ + + num_top_classes: Union[int, DefaultType] + num_top_feature_importance_values: Union[int, DefaultType] + prediction_field_type: Union[str, DefaultType] + results_field: Union[str, DefaultType] + top_classes_results_field: Union[str, DefaultType] + + def __init__( + self, + *, + num_top_classes: Union[int, DefaultType] = DEFAULT, + num_top_feature_importance_values: Union[int, DefaultType] = DEFAULT, + prediction_field_type: Union[str, DefaultType] = DEFAULT, + results_field: Union[str, DefaultType] = DEFAULT, + top_classes_results_field: Union[str, DefaultType] = DEFAULT, + **kwargs: Any, + ): + if num_top_classes is not DEFAULT: + kwargs["num_top_classes"] = num_top_classes + if num_top_feature_importance_values is not DEFAULT: + kwargs["num_top_feature_importance_values"] = ( + num_top_feature_importance_values + ) + if prediction_field_type is not DEFAULT: + kwargs["prediction_field_type"] = prediction_field_type + if results_field is not DEFAULT: + kwargs["results_field"] = results_field + if top_classes_results_field is not DEFAULT: + kwargs["top_classes_results_field"] = top_classes_results_field + super().__init__(kwargs) + + +class CommonTermsQuery(AttrDict[Any]): + """ + :arg query: (required) + :arg analyzer: + :arg cutoff_frequency: + :arg high_freq_operator: + :arg low_freq_operator: + :arg minimum_should_match: + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + query: Union[str, DefaultType] + analyzer: Union[str, DefaultType] + cutoff_frequency: Union[float, DefaultType] + high_freq_operator: Union[Literal["and", "or"], DefaultType] + low_freq_operator: Union[Literal["and", "or"], DefaultType] + minimum_should_match: Union[int, str, DefaultType] + boost: Union[float, DefaultType] + _name: Union[str, DefaultType] + + def __init__( + self, + *, + query: Union[str, DefaultType] = DEFAULT, + analyzer: Union[str, DefaultType] = DEFAULT, + cutoff_frequency: Union[float, DefaultType] = DEFAULT, + high_freq_operator: Union[Literal["and", "or"], DefaultType] = DEFAULT, + low_freq_operator: Union[Literal["and", "or"], DefaultType] = DEFAULT, + minimum_should_match: Union[int, str, DefaultType] = DEFAULT, + boost: Union[float, DefaultType] = DEFAULT, + _name: Union[str, DefaultType] = DEFAULT, + **kwargs: Any, + ): + if query is not DEFAULT: + kwargs["query"] = query + if analyzer is not DEFAULT: + kwargs["analyzer"] = analyzer + if cutoff_frequency is not DEFAULT: + kwargs["cutoff_frequency"] = cutoff_frequency + if high_freq_operator is not DEFAULT: + kwargs["high_freq_operator"] = high_freq_operator + if low_freq_operator is not DEFAULT: + kwargs["low_freq_operator"] = low_freq_operator + if minimum_should_match is not DEFAULT: + kwargs["minimum_should_match"] = minimum_should_match + if boost is not DEFAULT: + kwargs["boost"] = boost + if _name is not DEFAULT: + kwargs["_name"] = _name + super().__init__(kwargs) + + +class CoordsGeoBounds(AttrDict[Any]): + """ + :arg top: (required) + :arg bottom: (required) + :arg left: (required) + :arg right: (required) + """ + + top: Union[float, DefaultType] + bottom: Union[float, DefaultType] + left: Union[float, DefaultType] + right: Union[float, DefaultType] + + def __init__( + self, + *, + top: Union[float, DefaultType] = DEFAULT, + bottom: Union[float, DefaultType] = DEFAULT, + left: Union[float, DefaultType] = DEFAULT, + right: Union[float, DefaultType] = DEFAULT, + **kwargs: Any, + ): + if top is not DEFAULT: + kwargs["top"] = top + if bottom is not DEFAULT: + kwargs["bottom"] = bottom + if left is not DEFAULT: + kwargs["left"] = left + if right is not DEFAULT: + kwargs["right"] = right + super().__init__(kwargs) + + +class CustomCategorizeTextAnalyzer(AttrDict[Any]): + """ + :arg char_filter: + :arg tokenizer: + :arg filter: + """ + + char_filter: Union[Sequence[str], DefaultType] + tokenizer: Union[str, DefaultType] + filter: Union[Sequence[str], DefaultType] + + def __init__( + self, + *, + char_filter: Union[Sequence[str], DefaultType] = DEFAULT, + tokenizer: Union[str, DefaultType] = DEFAULT, + filter: Union[Sequence[str], DefaultType] = DEFAULT, + **kwargs: Any, + ): + if char_filter is not DEFAULT: + kwargs["char_filter"] = char_filter + if tokenizer is not DEFAULT: + kwargs["tokenizer"] = tokenizer + if filter is not DEFAULT: + kwargs["filter"] = filter + super().__init__(kwargs) + + +class DateRangeExpression(AttrDict[Any]): + """ + :arg from: Start of the range (inclusive). + :arg key: Custom key to return the range with. + :arg to: End of the range (exclusive). + """ + + from_: Union[str, float, DefaultType] + key: Union[str, DefaultType] + to: Union[str, float, DefaultType] + + def __init__( + self, + *, + from_: Union[str, float, DefaultType] = DEFAULT, + key: Union[str, DefaultType] = DEFAULT, + to: Union[str, float, DefaultType] = DEFAULT, + **kwargs: Any, + ): + if from_ is not DEFAULT: + kwargs["from_"] = from_ + if key is not DEFAULT: + kwargs["key"] = key + if to is not DEFAULT: + kwargs["to"] = to + super().__init__(kwargs) + + +class EmptyObject(AttrDict[Any]): + """ + For empty Class assignments + """ + + def __init__(self, **kwargs: Any): + super().__init__(kwargs) + + +class EwmaModelSettings(AttrDict[Any]): + """ + :arg alpha: + """ + + alpha: Union[float, DefaultType] + + def __init__(self, *, alpha: Union[float, DefaultType] = DEFAULT, **kwargs: Any): + if alpha is not DEFAULT: + kwargs["alpha"] = alpha + super().__init__(kwargs) + + +class ExtendedBounds(AttrDict[Any]): + """ + :arg max: Maximum value for the bound. + :arg min: Minimum value for the bound. + """ + + max: Any + min: Any + + def __init__(self, *, max: Any = DEFAULT, min: Any = DEFAULT, **kwargs: Any): + if max is not DEFAULT: + kwargs["max"] = max + if min is not DEFAULT: + kwargs["min"] = min + super().__init__(kwargs) + + +class FieldAndFormat(AttrDict[Any]): + """ + A reference to a field with formatting instructions on how to return + the value + + :arg field: (required) Wildcard pattern. The request returns values + for field names matching this pattern. + :arg format: Format in which the values are returned. + :arg include_unmapped: + """ + + field: Union[str, InstrumentedField, DefaultType] + format: Union[str, DefaultType] + include_unmapped: Union[bool, DefaultType] + + def __init__( + self, + *, + field: Union[str, InstrumentedField, DefaultType] = DEFAULT, + format: Union[str, DefaultType] = DEFAULT, + include_unmapped: Union[bool, DefaultType] = DEFAULT, + **kwargs: Any, + ): + if field is not DEFAULT: + kwargs["field"] = str(field) + if format is not DEFAULT: + kwargs["format"] = format + if include_unmapped is not DEFAULT: + kwargs["include_unmapped"] = include_unmapped + super().__init__(kwargs) + + +class FieldCollapse(AttrDict[Any]): + """ + :arg field: (required) The field to collapse the result set on + :arg inner_hits: The number of inner hits and their sort order + :arg max_concurrent_group_searches: The number of concurrent requests + allowed to retrieve the inner_hits per group + :arg collapse: + """ + + field: Union[str, InstrumentedField, DefaultType] + inner_hits: Union[ + "InnerHits", Sequence["InnerHits"], Sequence[Dict[str, Any]], DefaultType + ] + max_concurrent_group_searches: Union[int, DefaultType] + collapse: Union["FieldCollapse", Dict[str, Any], DefaultType] + + def __init__( + self, + *, + field: Union[str, InstrumentedField, DefaultType] = DEFAULT, + inner_hits: Union[ + "InnerHits", Sequence["InnerHits"], Sequence[Dict[str, Any]], DefaultType + ] = DEFAULT, + max_concurrent_group_searches: Union[int, DefaultType] = DEFAULT, + collapse: Union["FieldCollapse", Dict[str, Any], DefaultType] = DEFAULT, + **kwargs: Any, + ): + if field is not DEFAULT: + kwargs["field"] = str(field) + if inner_hits is not DEFAULT: + kwargs["inner_hits"] = inner_hits + if max_concurrent_group_searches is not DEFAULT: + kwargs["max_concurrent_group_searches"] = max_concurrent_group_searches + if collapse is not DEFAULT: + kwargs["collapse"] = collapse + super().__init__(kwargs) + + +class FieldLookup(AttrDict[Any]): + """ + :arg id: (required) `id` of the document. + :arg index: Index from which to retrieve the document. + :arg path: Name of the field. + :arg routing: Custom routing value. + """ + + id: Union[str, DefaultType] + index: Union[str, DefaultType] + path: Union[str, InstrumentedField, DefaultType] + routing: Union[str, DefaultType] + + def __init__( + self, + *, + id: Union[str, DefaultType] = DEFAULT, + index: Union[str, DefaultType] = DEFAULT, + path: Union[str, InstrumentedField, DefaultType] = DEFAULT, + routing: Union[str, DefaultType] = DEFAULT, + **kwargs: Any, + ): + if id is not DEFAULT: + kwargs["id"] = id + if index is not DEFAULT: + kwargs["index"] = index + if path is not DEFAULT: + kwargs["path"] = str(path) + if routing is not DEFAULT: + kwargs["routing"] = routing + super().__init__(kwargs) + + +class FieldSort(AttrDict[Any]): + """ + :arg missing: + :arg mode: + :arg nested: + :arg order: + :arg unmapped_type: + :arg numeric_type: + :arg format: + """ + + missing: Union[str, int, float, bool, DefaultType] + mode: Union[Literal["min", "max", "sum", "avg", "median"], DefaultType] + nested: Union["NestedSortValue", Dict[str, Any], DefaultType] + order: Union[Literal["asc", "desc"], DefaultType] + unmapped_type: Union[ + Literal[ + "none", + "geo_point", + "geo_shape", + "ip", + "binary", + "keyword", + "text", + "search_as_you_type", + "date", + "date_nanos", + "boolean", + "completion", + "nested", + "object", + "version", + "murmur3", + "token_count", + "percolator", + "integer", + "long", + "short", + "byte", + "float", + "half_float", + "scaled_float", + "double", + "integer_range", + "float_range", + "long_range", + "double_range", + "date_range", + "ip_range", + "alias", + "join", + "rank_feature", + "rank_features", + "flattened", + "shape", + "histogram", + "constant_keyword", + "aggregate_metric_double", + "dense_vector", + "semantic_text", + "sparse_vector", + "match_only_text", + "icu_collation_keyword", + ], + DefaultType, + ] + numeric_type: Union[Literal["long", "double", "date", "date_nanos"], DefaultType] + format: Union[str, DefaultType] + + def __init__( + self, + *, + missing: Union[str, int, float, bool, DefaultType] = DEFAULT, + mode: Union[ + Literal["min", "max", "sum", "avg", "median"], DefaultType + ] = DEFAULT, + nested: Union["NestedSortValue", Dict[str, Any], DefaultType] = DEFAULT, + order: Union[Literal["asc", "desc"], DefaultType] = DEFAULT, + unmapped_type: Union[ + Literal[ + "none", + "geo_point", + "geo_shape", + "ip", + "binary", + "keyword", + "text", + "search_as_you_type", + "date", + "date_nanos", + "boolean", + "completion", + "nested", + "object", + "version", + "murmur3", + "token_count", + "percolator", + "integer", + "long", + "short", + "byte", + "float", + "half_float", + "scaled_float", + "double", + "integer_range", + "float_range", + "long_range", + "double_range", + "date_range", + "ip_range", + "alias", + "join", + "rank_feature", + "rank_features", + "flattened", + "shape", + "histogram", + "constant_keyword", + "aggregate_metric_double", + "dense_vector", + "semantic_text", + "sparse_vector", + "match_only_text", + "icu_collation_keyword", + ], + DefaultType, + ] = DEFAULT, + numeric_type: Union[ + Literal["long", "double", "date", "date_nanos"], DefaultType + ] = DEFAULT, + format: Union[str, DefaultType] = DEFAULT, + **kwargs: Any, + ): + if missing is not DEFAULT: + kwargs["missing"] = missing + if mode is not DEFAULT: + kwargs["mode"] = mode + if nested is not DEFAULT: + kwargs["nested"] = nested + if order is not DEFAULT: + kwargs["order"] = order + if unmapped_type is not DEFAULT: + kwargs["unmapped_type"] = unmapped_type + if numeric_type is not DEFAULT: + kwargs["numeric_type"] = numeric_type + if format is not DEFAULT: + kwargs["format"] = format + super().__init__(kwargs) + + +class FrequentItemSetsField(AttrDict[Any]): + """ + :arg field: (required) + :arg exclude: Values to exclude. Can be regular expression strings or + arrays of strings of exact terms. + :arg include: Values to include. Can be regular expression strings or + arrays of strings of exact terms. + """ + + field: Union[str, InstrumentedField, DefaultType] + exclude: Union[str, Sequence[str], DefaultType] + include: Union[str, Sequence[str], "TermsPartition", Dict[str, Any], DefaultType] + + def __init__( + self, + *, + field: Union[str, InstrumentedField, DefaultType] = DEFAULT, + exclude: Union[str, Sequence[str], DefaultType] = DEFAULT, + include: Union[ + str, Sequence[str], "TermsPartition", Dict[str, Any], DefaultType + ] = DEFAULT, + **kwargs: Any, + ): + if field is not DEFAULT: + kwargs["field"] = str(field) + if exclude is not DEFAULT: + kwargs["exclude"] = exclude + if include is not DEFAULT: + kwargs["include"] = include + super().__init__(kwargs) + + +class FuzzyQuery(AttrDict[Any]): + """ + :arg value: (required) Term you wish to find in the provided field. + :arg max_expansions: Maximum number of variations created. Defaults to + `50` if omitted. + :arg prefix_length: Number of beginning characters left unchanged when + creating expansions. + :arg rewrite: Number of beginning characters left unchanged when + creating expansions. Defaults to `constant_score` if omitted. + :arg transpositions: Indicates whether edits include transpositions of + two adjacent characters (for example `ab` to `ba`). Defaults to + `True` if omitted. + :arg fuzziness: Maximum edit distance allowed for matching. + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + value: Union[str, float, bool, DefaultType] + max_expansions: Union[int, DefaultType] + prefix_length: Union[int, DefaultType] + rewrite: Union[str, DefaultType] + transpositions: Union[bool, DefaultType] + fuzziness: Union[str, int, DefaultType] + boost: Union[float, DefaultType] + _name: Union[str, DefaultType] + + def __init__( + self, + *, + value: Union[str, float, bool, DefaultType] = DEFAULT, + max_expansions: Union[int, DefaultType] = DEFAULT, + prefix_length: Union[int, DefaultType] = DEFAULT, + rewrite: Union[str, DefaultType] = DEFAULT, + transpositions: Union[bool, DefaultType] = DEFAULT, + fuzziness: Union[str, int, DefaultType] = DEFAULT, + boost: Union[float, DefaultType] = DEFAULT, + _name: Union[str, DefaultType] = DEFAULT, + **kwargs: Any, + ): + if value is not DEFAULT: + kwargs["value"] = value + if max_expansions is not DEFAULT: + kwargs["max_expansions"] = max_expansions + if prefix_length is not DEFAULT: + kwargs["prefix_length"] = prefix_length + if rewrite is not DEFAULT: + kwargs["rewrite"] = rewrite + if transpositions is not DEFAULT: + kwargs["transpositions"] = transpositions + if fuzziness is not DEFAULT: + kwargs["fuzziness"] = fuzziness + if boost is not DEFAULT: + kwargs["boost"] = boost + if _name is not DEFAULT: + kwargs["_name"] = _name + super().__init__(kwargs) + + +class GeoDistanceSort(AttrDict[Any]): + """ + :arg _field: The field to use in this query. + :arg _value: The query value for the field. + :arg mode: + :arg distance_type: + :arg ignore_unmapped: + :arg order: + :arg unit: + :arg nested: + """ + + _field: Union[str, "InstrumentedField", "DefaultType"] + _value: Union[ + Union["LatLonGeoLocation", "GeoHashLocation", Sequence[float], str], + Sequence[Union["LatLonGeoLocation", "GeoHashLocation", Sequence[float], str]], + Dict[str, Any], + "DefaultType", + ] + mode: Union[Literal["min", "max", "sum", "avg", "median"], DefaultType] + distance_type: Union[Literal["arc", "plane"], DefaultType] + ignore_unmapped: Union[bool, DefaultType] + order: Union[Literal["asc", "desc"], DefaultType] + unit: Union[ + Literal["in", "ft", "yd", "mi", "nmi", "km", "m", "cm", "mm"], DefaultType + ] + nested: Union["NestedSortValue", Dict[str, Any], DefaultType] + + def __init__( + self, + _field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + _value: Union[ + Union["LatLonGeoLocation", "GeoHashLocation", Sequence[float], str], + Sequence[ + Union["LatLonGeoLocation", "GeoHashLocation", Sequence[float], str] + ], + Dict[str, Any], + "DefaultType", + ] = DEFAULT, + *, + mode: Union[ + Literal["min", "max", "sum", "avg", "median"], DefaultType + ] = DEFAULT, + distance_type: Union[Literal["arc", "plane"], DefaultType] = DEFAULT, + ignore_unmapped: Union[bool, DefaultType] = DEFAULT, + order: Union[Literal["asc", "desc"], DefaultType] = DEFAULT, + unit: Union[ + Literal["in", "ft", "yd", "mi", "nmi", "km", "m", "cm", "mm"], DefaultType + ] = DEFAULT, + nested: Union["NestedSortValue", Dict[str, Any], DefaultType] = DEFAULT, + **kwargs: Any, + ): + if _field is not DEFAULT: + kwargs[str(_field)] = _value + if mode is not DEFAULT: + kwargs["mode"] = mode + if distance_type is not DEFAULT: + kwargs["distance_type"] = distance_type + if ignore_unmapped is not DEFAULT: + kwargs["ignore_unmapped"] = ignore_unmapped + if order is not DEFAULT: + kwargs["order"] = order + if unit is not DEFAULT: + kwargs["unit"] = unit + if nested is not DEFAULT: + kwargs["nested"] = nested + super().__init__(kwargs) + + +class GeoHashLocation(AttrDict[Any]): + """ + :arg geohash: (required) + """ + + geohash: Union[str, DefaultType] + + def __init__(self, *, geohash: Union[str, DefaultType] = DEFAULT, **kwargs: Any): + if geohash is not DEFAULT: + kwargs["geohash"] = geohash + super().__init__(kwargs) + + +class GeoLinePoint(AttrDict[Any]): + """ + :arg field: (required) The name of the geo_point field. + """ + + field: Union[str, InstrumentedField, DefaultType] + + def __init__( + self, + *, + field: Union[str, InstrumentedField, DefaultType] = DEFAULT, + **kwargs: Any, + ): + if field is not DEFAULT: + kwargs["field"] = str(field) + super().__init__(kwargs) + + +class GeoLineSort(AttrDict[Any]): + """ + :arg field: (required) The name of the numeric field to use as the + sort key for ordering the points. + """ + + field: Union[str, InstrumentedField, DefaultType] + + def __init__( + self, + *, + field: Union[str, InstrumentedField, DefaultType] = DEFAULT, + **kwargs: Any, + ): + if field is not DEFAULT: + kwargs["field"] = str(field) + super().__init__(kwargs) + + +class GeoPolygonPoints(AttrDict[Any]): + """ + :arg points: (required) + """ + + points: Union[ + Sequence[Union["LatLonGeoLocation", "GeoHashLocation", Sequence[float], str]], + Dict[str, Any], + DefaultType, + ] + + def __init__( + self, + *, + points: Union[ + Sequence[ + Union["LatLonGeoLocation", "GeoHashLocation", Sequence[float], str] + ], + Dict[str, Any], + DefaultType, + ] = DEFAULT, + **kwargs: Any, + ): + if points is not DEFAULT: + kwargs["points"] = points + super().__init__(kwargs) + + +class GeoShapeFieldQuery(AttrDict[Any]): + """ + :arg shape: + :arg indexed_shape: Query using an indexed shape retrieved from the + the specified document and path. + :arg relation: Spatial relation operator used to search a geo field. + Defaults to `intersects` if omitted. + """ + + shape: Any + indexed_shape: Union["FieldLookup", Dict[str, Any], DefaultType] + relation: Union[ + Literal["intersects", "disjoint", "within", "contains"], DefaultType + ] + + def __init__( + self, + *, + shape: Any = DEFAULT, + indexed_shape: Union["FieldLookup", Dict[str, Any], DefaultType] = DEFAULT, + relation: Union[ + Literal["intersects", "disjoint", "within", "contains"], DefaultType + ] = DEFAULT, + **kwargs: Any, + ): + if shape is not DEFAULT: + kwargs["shape"] = shape + if indexed_shape is not DEFAULT: + kwargs["indexed_shape"] = indexed_shape + if relation is not DEFAULT: + kwargs["relation"] = relation + super().__init__(kwargs) + + +class GoogleNormalizedDistanceHeuristic(AttrDict[Any]): + """ + :arg background_is_superset: Set to `false` if you defined a custom + background filter that represents a different set of documents + that you want to compare to. + """ + + background_is_superset: Union[bool, DefaultType] + + def __init__( + self, + *, + background_is_superset: Union[bool, DefaultType] = DEFAULT, + **kwargs: Any, + ): + if background_is_superset is not DEFAULT: + kwargs["background_is_superset"] = background_is_superset + super().__init__(kwargs) + + +class HdrMethod(AttrDict[Any]): + """ + :arg number_of_significant_value_digits: Specifies the resolution of + values for the histogram in number of significant digits. + """ + + number_of_significant_value_digits: Union[int, DefaultType] + + def __init__( + self, + *, + number_of_significant_value_digits: Union[int, DefaultType] = DEFAULT, + **kwargs: Any, + ): + if number_of_significant_value_digits is not DEFAULT: + kwargs["number_of_significant_value_digits"] = ( + number_of_significant_value_digits + ) + super().__init__(kwargs) + + +class Highlight(AttrDict[Any]): + """ + :arg fields: (required) + :arg encoder: + :arg type: + :arg boundary_chars: A string that contains each boundary character. + Defaults to `.,!? \t\n` if omitted. + :arg boundary_max_scan: How far to scan for boundary characters. + Defaults to `20` if omitted. + :arg boundary_scanner: Specifies how to break the highlighted + fragments: chars, sentence, or word. Only valid for the unified + and fvh highlighters. Defaults to `sentence` for the `unified` + highlighter. Defaults to `chars` for the `fvh` highlighter. + :arg boundary_scanner_locale: Controls which locale is used to search + for sentence and word boundaries. This parameter takes a form of a + language tag, for example: `"en-US"`, `"fr-FR"`, `"ja-JP"`. + Defaults to `Locale.ROOT` if omitted. + :arg force_source: + :arg fragmenter: Specifies how text should be broken up in highlight + snippets: `simple` or `span`. Only valid for the `plain` + highlighter. Defaults to `span` if omitted. + :arg fragment_size: The size of the highlighted fragment in + characters. Defaults to `100` if omitted. + :arg highlight_filter: + :arg highlight_query: Highlight matches for a query other than the + search query. This is especially useful if you use a rescore query + because those are not taken into account by highlighting by + default. + :arg max_fragment_length: + :arg max_analyzed_offset: If set to a non-negative value, highlighting + stops at this defined maximum limit. The rest of the text is not + processed, thus not highlighted and no error is returned The + `max_analyzed_offset` query setting does not override the + `index.highlight.max_analyzed_offset` setting, which prevails when + it’s set to lower value than the query setting. + :arg no_match_size: The amount of text you want to return from the + beginning of the field if there are no matching fragments to + highlight. + :arg number_of_fragments: The maximum number of fragments to return. + If the number of fragments is set to `0`, no fragments are + returned. Instead, the entire field contents are highlighted and + returned. This can be handy when you need to highlight short texts + such as a title or address, but fragmentation is not required. If + `number_of_fragments` is `0`, `fragment_size` is ignored. Defaults + to `5` if omitted. + :arg options: + :arg order: Sorts highlighted fragments by score when set to `score`. + By default, fragments will be output in the order they appear in + the field (order: `none`). Setting this option to `score` will + output the most relevant fragments first. Each highlighter applies + its own logic to compute relevancy scores. Defaults to `none` if + omitted. + :arg phrase_limit: Controls the number of matching phrases in a + document that are considered. Prevents the `fvh` highlighter from + analyzing too many phrases and consuming too much memory. When + using `matched_fields`, `phrase_limit` phrases per matched field + are considered. Raising the limit increases query time and + consumes more memory. Only supported by the `fvh` highlighter. + Defaults to `256` if omitted. + :arg post_tags: Use in conjunction with `pre_tags` to define the HTML + tags to use for the highlighted text. By default, highlighted text + is wrapped in `` and `` tags. + :arg pre_tags: Use in conjunction with `post_tags` to define the HTML + tags to use for the highlighted text. By default, highlighted text + is wrapped in `` and `` tags. + :arg require_field_match: By default, only fields that contains a + query match are highlighted. Set to `false` to highlight all + fields. Defaults to `True` if omitted. + :arg tags_schema: Set to `styled` to use the built-in tag schema. + """ + + fields: Union[ + Mapping[Union[str, InstrumentedField], "HighlightField"], + Dict[str, Any], + DefaultType, + ] + encoder: Union[Literal["default", "html"], DefaultType] + type: Union[Literal["plain", "fvh", "unified"], DefaultType] + boundary_chars: Union[str, DefaultType] + boundary_max_scan: Union[int, DefaultType] + boundary_scanner: Union[Literal["chars", "sentence", "word"], DefaultType] + boundary_scanner_locale: Union[str, DefaultType] + force_source: Union[bool, DefaultType] + fragmenter: Union[Literal["simple", "span"], DefaultType] + fragment_size: Union[int, DefaultType] + highlight_filter: Union[bool, DefaultType] + highlight_query: Union[Query, DefaultType] + max_fragment_length: Union[int, DefaultType] + max_analyzed_offset: Union[int, DefaultType] + no_match_size: Union[int, DefaultType] + number_of_fragments: Union[int, DefaultType] + options: Union[Mapping[str, Any], DefaultType] + order: Union[Literal["score"], DefaultType] + phrase_limit: Union[int, DefaultType] + post_tags: Union[Sequence[str], DefaultType] + pre_tags: Union[Sequence[str], DefaultType] + require_field_match: Union[bool, DefaultType] + tags_schema: Union[Literal["styled"], DefaultType] + + def __init__( + self, + *, + fields: Union[ + Mapping[Union[str, InstrumentedField], "HighlightField"], + Dict[str, Any], + DefaultType, + ] = DEFAULT, + encoder: Union[Literal["default", "html"], DefaultType] = DEFAULT, + type: Union[Literal["plain", "fvh", "unified"], DefaultType] = DEFAULT, + boundary_chars: Union[str, DefaultType] = DEFAULT, + boundary_max_scan: Union[int, DefaultType] = DEFAULT, + boundary_scanner: Union[ + Literal["chars", "sentence", "word"], DefaultType + ] = DEFAULT, + boundary_scanner_locale: Union[str, DefaultType] = DEFAULT, + force_source: Union[bool, DefaultType] = DEFAULT, + fragmenter: Union[Literal["simple", "span"], DefaultType] = DEFAULT, + fragment_size: Union[int, DefaultType] = DEFAULT, + highlight_filter: Union[bool, DefaultType] = DEFAULT, + highlight_query: Union[Query, DefaultType] = DEFAULT, + max_fragment_length: Union[int, DefaultType] = DEFAULT, + max_analyzed_offset: Union[int, DefaultType] = DEFAULT, + no_match_size: Union[int, DefaultType] = DEFAULT, + number_of_fragments: Union[int, DefaultType] = DEFAULT, + options: Union[Mapping[str, Any], DefaultType] = DEFAULT, + order: Union[Literal["score"], DefaultType] = DEFAULT, + phrase_limit: Union[int, DefaultType] = DEFAULT, + post_tags: Union[Sequence[str], DefaultType] = DEFAULT, + pre_tags: Union[Sequence[str], DefaultType] = DEFAULT, + require_field_match: Union[bool, DefaultType] = DEFAULT, + tags_schema: Union[Literal["styled"], DefaultType] = DEFAULT, + **kwargs: Any, + ): + if fields is not DEFAULT: + kwargs["fields"] = str(fields) + if encoder is not DEFAULT: + kwargs["encoder"] = encoder + if type is not DEFAULT: + kwargs["type"] = type + if boundary_chars is not DEFAULT: + kwargs["boundary_chars"] = boundary_chars + if boundary_max_scan is not DEFAULT: + kwargs["boundary_max_scan"] = boundary_max_scan + if boundary_scanner is not DEFAULT: + kwargs["boundary_scanner"] = boundary_scanner + if boundary_scanner_locale is not DEFAULT: + kwargs["boundary_scanner_locale"] = boundary_scanner_locale + if force_source is not DEFAULT: + kwargs["force_source"] = force_source + if fragmenter is not DEFAULT: + kwargs["fragmenter"] = fragmenter + if fragment_size is not DEFAULT: + kwargs["fragment_size"] = fragment_size + if highlight_filter is not DEFAULT: + kwargs["highlight_filter"] = highlight_filter + if highlight_query is not DEFAULT: + kwargs["highlight_query"] = highlight_query + if max_fragment_length is not DEFAULT: + kwargs["max_fragment_length"] = max_fragment_length + if max_analyzed_offset is not DEFAULT: + kwargs["max_analyzed_offset"] = max_analyzed_offset + if no_match_size is not DEFAULT: + kwargs["no_match_size"] = no_match_size + if number_of_fragments is not DEFAULT: + kwargs["number_of_fragments"] = number_of_fragments + if options is not DEFAULT: + kwargs["options"] = options + if order is not DEFAULT: + kwargs["order"] = order + if phrase_limit is not DEFAULT: + kwargs["phrase_limit"] = phrase_limit + if post_tags is not DEFAULT: + kwargs["post_tags"] = post_tags + if pre_tags is not DEFAULT: + kwargs["pre_tags"] = pre_tags + if require_field_match is not DEFAULT: + kwargs["require_field_match"] = require_field_match + if tags_schema is not DEFAULT: + kwargs["tags_schema"] = tags_schema + super().__init__(kwargs) + + +class HighlightField(AttrDict[Any]): + """ + :arg fragment_offset: + :arg matched_fields: + :arg type: + :arg boundary_chars: A string that contains each boundary character. + Defaults to `.,!? \t\n` if omitted. + :arg boundary_max_scan: How far to scan for boundary characters. + Defaults to `20` if omitted. + :arg boundary_scanner: Specifies how to break the highlighted + fragments: chars, sentence, or word. Only valid for the unified + and fvh highlighters. Defaults to `sentence` for the `unified` + highlighter. Defaults to `chars` for the `fvh` highlighter. + :arg boundary_scanner_locale: Controls which locale is used to search + for sentence and word boundaries. This parameter takes a form of a + language tag, for example: `"en-US"`, `"fr-FR"`, `"ja-JP"`. + Defaults to `Locale.ROOT` if omitted. + :arg force_source: + :arg fragmenter: Specifies how text should be broken up in highlight + snippets: `simple` or `span`. Only valid for the `plain` + highlighter. Defaults to `span` if omitted. + :arg fragment_size: The size of the highlighted fragment in + characters. Defaults to `100` if omitted. + :arg highlight_filter: + :arg highlight_query: Highlight matches for a query other than the + search query. This is especially useful if you use a rescore query + because those are not taken into account by highlighting by + default. + :arg max_fragment_length: + :arg max_analyzed_offset: If set to a non-negative value, highlighting + stops at this defined maximum limit. The rest of the text is not + processed, thus not highlighted and no error is returned The + `max_analyzed_offset` query setting does not override the + `index.highlight.max_analyzed_offset` setting, which prevails when + it’s set to lower value than the query setting. + :arg no_match_size: The amount of text you want to return from the + beginning of the field if there are no matching fragments to + highlight. + :arg number_of_fragments: The maximum number of fragments to return. + If the number of fragments is set to `0`, no fragments are + returned. Instead, the entire field contents are highlighted and + returned. This can be handy when you need to highlight short texts + such as a title or address, but fragmentation is not required. If + `number_of_fragments` is `0`, `fragment_size` is ignored. Defaults + to `5` if omitted. + :arg options: + :arg order: Sorts highlighted fragments by score when set to `score`. + By default, fragments will be output in the order they appear in + the field (order: `none`). Setting this option to `score` will + output the most relevant fragments first. Each highlighter applies + its own logic to compute relevancy scores. Defaults to `none` if + omitted. + :arg phrase_limit: Controls the number of matching phrases in a + document that are considered. Prevents the `fvh` highlighter from + analyzing too many phrases and consuming too much memory. When + using `matched_fields`, `phrase_limit` phrases per matched field + are considered. Raising the limit increases query time and + consumes more memory. Only supported by the `fvh` highlighter. + Defaults to `256` if omitted. + :arg post_tags: Use in conjunction with `pre_tags` to define the HTML + tags to use for the highlighted text. By default, highlighted text + is wrapped in `` and `` tags. + :arg pre_tags: Use in conjunction with `post_tags` to define the HTML + tags to use for the highlighted text. By default, highlighted text + is wrapped in `` and `` tags. + :arg require_field_match: By default, only fields that contains a + query match are highlighted. Set to `false` to highlight all + fields. Defaults to `True` if omitted. + :arg tags_schema: Set to `styled` to use the built-in tag schema. + """ + + fragment_offset: Union[int, DefaultType] + matched_fields: Union[ + Union[str, InstrumentedField], + Sequence[Union[str, InstrumentedField]], + DefaultType, + ] + type: Union[Literal["plain", "fvh", "unified"], DefaultType] + boundary_chars: Union[str, DefaultType] + boundary_max_scan: Union[int, DefaultType] + boundary_scanner: Union[Literal["chars", "sentence", "word"], DefaultType] + boundary_scanner_locale: Union[str, DefaultType] + force_source: Union[bool, DefaultType] + fragmenter: Union[Literal["simple", "span"], DefaultType] + fragment_size: Union[int, DefaultType] + highlight_filter: Union[bool, DefaultType] + highlight_query: Union[Query, DefaultType] + max_fragment_length: Union[int, DefaultType] + max_analyzed_offset: Union[int, DefaultType] + no_match_size: Union[int, DefaultType] + number_of_fragments: Union[int, DefaultType] + options: Union[Mapping[str, Any], DefaultType] + order: Union[Literal["score"], DefaultType] + phrase_limit: Union[int, DefaultType] + post_tags: Union[Sequence[str], DefaultType] + pre_tags: Union[Sequence[str], DefaultType] + require_field_match: Union[bool, DefaultType] + tags_schema: Union[Literal["styled"], DefaultType] + + def __init__( + self, + *, + fragment_offset: Union[int, DefaultType] = DEFAULT, + matched_fields: Union[ + Union[str, InstrumentedField], + Sequence[Union[str, InstrumentedField]], + DefaultType, + ] = DEFAULT, + type: Union[Literal["plain", "fvh", "unified"], DefaultType] = DEFAULT, + boundary_chars: Union[str, DefaultType] = DEFAULT, + boundary_max_scan: Union[int, DefaultType] = DEFAULT, + boundary_scanner: Union[ + Literal["chars", "sentence", "word"], DefaultType + ] = DEFAULT, + boundary_scanner_locale: Union[str, DefaultType] = DEFAULT, + force_source: Union[bool, DefaultType] = DEFAULT, + fragmenter: Union[Literal["simple", "span"], DefaultType] = DEFAULT, + fragment_size: Union[int, DefaultType] = DEFAULT, + highlight_filter: Union[bool, DefaultType] = DEFAULT, + highlight_query: Union[Query, DefaultType] = DEFAULT, + max_fragment_length: Union[int, DefaultType] = DEFAULT, + max_analyzed_offset: Union[int, DefaultType] = DEFAULT, + no_match_size: Union[int, DefaultType] = DEFAULT, + number_of_fragments: Union[int, DefaultType] = DEFAULT, + options: Union[Mapping[str, Any], DefaultType] = DEFAULT, + order: Union[Literal["score"], DefaultType] = DEFAULT, + phrase_limit: Union[int, DefaultType] = DEFAULT, + post_tags: Union[Sequence[str], DefaultType] = DEFAULT, + pre_tags: Union[Sequence[str], DefaultType] = DEFAULT, + require_field_match: Union[bool, DefaultType] = DEFAULT, + tags_schema: Union[Literal["styled"], DefaultType] = DEFAULT, + **kwargs: Any, + ): + if fragment_offset is not DEFAULT: + kwargs["fragment_offset"] = fragment_offset + if matched_fields is not DEFAULT: + kwargs["matched_fields"] = str(matched_fields) + if type is not DEFAULT: + kwargs["type"] = type + if boundary_chars is not DEFAULT: + kwargs["boundary_chars"] = boundary_chars + if boundary_max_scan is not DEFAULT: + kwargs["boundary_max_scan"] = boundary_max_scan + if boundary_scanner is not DEFAULT: + kwargs["boundary_scanner"] = boundary_scanner + if boundary_scanner_locale is not DEFAULT: + kwargs["boundary_scanner_locale"] = boundary_scanner_locale + if force_source is not DEFAULT: + kwargs["force_source"] = force_source + if fragmenter is not DEFAULT: + kwargs["fragmenter"] = fragmenter + if fragment_size is not DEFAULT: + kwargs["fragment_size"] = fragment_size + if highlight_filter is not DEFAULT: + kwargs["highlight_filter"] = highlight_filter + if highlight_query is not DEFAULT: + kwargs["highlight_query"] = highlight_query + if max_fragment_length is not DEFAULT: + kwargs["max_fragment_length"] = max_fragment_length + if max_analyzed_offset is not DEFAULT: + kwargs["max_analyzed_offset"] = max_analyzed_offset + if no_match_size is not DEFAULT: + kwargs["no_match_size"] = no_match_size + if number_of_fragments is not DEFAULT: + kwargs["number_of_fragments"] = number_of_fragments + if options is not DEFAULT: + kwargs["options"] = options + if order is not DEFAULT: + kwargs["order"] = order + if phrase_limit is not DEFAULT: + kwargs["phrase_limit"] = phrase_limit + if post_tags is not DEFAULT: + kwargs["post_tags"] = post_tags + if pre_tags is not DEFAULT: + kwargs["pre_tags"] = pre_tags + if require_field_match is not DEFAULT: + kwargs["require_field_match"] = require_field_match + if tags_schema is not DEFAULT: + kwargs["tags_schema"] = tags_schema + super().__init__(kwargs) + + +class HoltLinearModelSettings(AttrDict[Any]): + """ + :arg alpha: + :arg beta: + """ + + alpha: Union[float, DefaultType] + beta: Union[float, DefaultType] + + def __init__( + self, + *, + alpha: Union[float, DefaultType] = DEFAULT, + beta: Union[float, DefaultType] = DEFAULT, + **kwargs: Any, + ): + if alpha is not DEFAULT: + kwargs["alpha"] = alpha + if beta is not DEFAULT: + kwargs["beta"] = beta + super().__init__(kwargs) + + +class HoltWintersModelSettings(AttrDict[Any]): + """ + :arg alpha: + :arg beta: + :arg gamma: + :arg pad: + :arg period: + :arg type: + """ + + alpha: Union[float, DefaultType] + beta: Union[float, DefaultType] + gamma: Union[float, DefaultType] + pad: Union[bool, DefaultType] + period: Union[int, DefaultType] + type: Union[Literal["add", "mult"], DefaultType] + + def __init__( + self, + *, + alpha: Union[float, DefaultType] = DEFAULT, + beta: Union[float, DefaultType] = DEFAULT, + gamma: Union[float, DefaultType] = DEFAULT, + pad: Union[bool, DefaultType] = DEFAULT, + period: Union[int, DefaultType] = DEFAULT, + type: Union[Literal["add", "mult"], DefaultType] = DEFAULT, + **kwargs: Any, + ): + if alpha is not DEFAULT: + kwargs["alpha"] = alpha + if beta is not DEFAULT: + kwargs["beta"] = beta + if gamma is not DEFAULT: + kwargs["gamma"] = gamma + if pad is not DEFAULT: + kwargs["pad"] = pad + if period is not DEFAULT: + kwargs["period"] = period + if type is not DEFAULT: + kwargs["type"] = type + super().__init__(kwargs) + + +class InferenceConfigContainer(AttrDict[Any]): + """ + :arg regression: Regression configuration for inference. + :arg classification: Classification configuration for inference. + """ + + regression: Union["RegressionInferenceOptions", Dict[str, Any], DefaultType] + classification: Union["ClassificationInferenceOptions", Dict[str, Any], DefaultType] + + def __init__( + self, + *, + regression: Union[ + "RegressionInferenceOptions", Dict[str, Any], DefaultType + ] = DEFAULT, + classification: Union[ + "ClassificationInferenceOptions", Dict[str, Any], DefaultType + ] = DEFAULT, + **kwargs: Any, + ): + if regression is not DEFAULT: + kwargs["regression"] = regression + if classification is not DEFAULT: + kwargs["classification"] = classification + super().__init__(kwargs) + + +class InnerHits(AttrDict[Any]): + """ + :arg name: The name for the particular inner hit definition in the + response. Useful when a search request contains multiple inner + hits. + :arg size: The maximum number of hits to return per `inner_hits`. + Defaults to `3` if omitted. + :arg from: Inner hit starting document offset. + :arg collapse: + :arg docvalue_fields: + :arg explain: + :arg highlight: + :arg ignore_unmapped: + :arg script_fields: + :arg seq_no_primary_term: + :arg fields: + :arg sort: How the inner hits should be sorted per `inner_hits`. By + default, inner hits are sorted by score. + :arg _source: + :arg stored_fields: + :arg track_scores: + :arg version: + """ + + name: Union[str, DefaultType] + size: Union[int, DefaultType] + from_: Union[int, DefaultType] + collapse: Union["FieldCollapse", Dict[str, Any], DefaultType] + docvalue_fields: Union[ + Sequence["FieldAndFormat"], Sequence[Dict[str, Any]], DefaultType + ] + explain: Union[bool, DefaultType] + highlight: Union["Highlight", Dict[str, Any], DefaultType] + ignore_unmapped: Union[bool, DefaultType] + script_fields: Union[ + Mapping[Union[str, InstrumentedField], "ScriptField"], + Dict[str, Any], + DefaultType, + ] + seq_no_primary_term: Union[bool, DefaultType] + fields: Union[ + Union[str, InstrumentedField], + Sequence[Union[str, InstrumentedField]], + DefaultType, + ] + sort: Union[ + Union[Union[str, InstrumentedField], "SortOptions"], + Sequence[Union[Union[str, InstrumentedField], "SortOptions"]], + Dict[str, Any], + DefaultType, + ] + _source: Union[bool, "SourceFilter", Dict[str, Any], DefaultType] + stored_fields: Union[ + Union[str, InstrumentedField], + Sequence[Union[str, InstrumentedField]], + DefaultType, + ] + track_scores: Union[bool, DefaultType] + version: Union[bool, DefaultType] + + def __init__( + self, + *, + name: Union[str, DefaultType] = DEFAULT, + size: Union[int, DefaultType] = DEFAULT, + from_: Union[int, DefaultType] = DEFAULT, + collapse: Union["FieldCollapse", Dict[str, Any], DefaultType] = DEFAULT, + docvalue_fields: Union[ + Sequence["FieldAndFormat"], Sequence[Dict[str, Any]], DefaultType + ] = DEFAULT, + explain: Union[bool, DefaultType] = DEFAULT, + highlight: Union["Highlight", Dict[str, Any], DefaultType] = DEFAULT, + ignore_unmapped: Union[bool, DefaultType] = DEFAULT, + script_fields: Union[ + Mapping[Union[str, InstrumentedField], "ScriptField"], + Dict[str, Any], + DefaultType, + ] = DEFAULT, + seq_no_primary_term: Union[bool, DefaultType] = DEFAULT, + fields: Union[ + Union[str, InstrumentedField], + Sequence[Union[str, InstrumentedField]], + DefaultType, + ] = DEFAULT, + sort: Union[ + Union[Union[str, InstrumentedField], "SortOptions"], + Sequence[Union[Union[str, InstrumentedField], "SortOptions"]], + Dict[str, Any], + DefaultType, + ] = DEFAULT, + _source: Union[bool, "SourceFilter", Dict[str, Any], DefaultType] = DEFAULT, + stored_fields: Union[ + Union[str, InstrumentedField], + Sequence[Union[str, InstrumentedField]], + DefaultType, + ] = DEFAULT, + track_scores: Union[bool, DefaultType] = DEFAULT, + version: Union[bool, DefaultType] = DEFAULT, + **kwargs: Any, + ): + if name is not DEFAULT: + kwargs["name"] = name + if size is not DEFAULT: + kwargs["size"] = size + if from_ is not DEFAULT: + kwargs["from_"] = from_ + if collapse is not DEFAULT: + kwargs["collapse"] = collapse + if docvalue_fields is not DEFAULT: + kwargs["docvalue_fields"] = docvalue_fields + if explain is not DEFAULT: + kwargs["explain"] = explain + if highlight is not DEFAULT: + kwargs["highlight"] = highlight + if ignore_unmapped is not DEFAULT: + kwargs["ignore_unmapped"] = ignore_unmapped + if script_fields is not DEFAULT: + kwargs["script_fields"] = str(script_fields) + if seq_no_primary_term is not DEFAULT: + kwargs["seq_no_primary_term"] = seq_no_primary_term + if fields is not DEFAULT: + kwargs["fields"] = str(fields) + if sort is not DEFAULT: + kwargs["sort"] = str(sort) + if _source is not DEFAULT: + kwargs["_source"] = _source + if stored_fields is not DEFAULT: + kwargs["stored_fields"] = str(stored_fields) + if track_scores is not DEFAULT: + kwargs["track_scores"] = track_scores + if version is not DEFAULT: + kwargs["version"] = version + super().__init__(kwargs) + + +class IntervalsAllOf(AttrDict[Any]): + """ + :arg intervals: (required) An array of rules to combine. All rules + must produce a match in a document for the overall source to + match. + :arg max_gaps: Maximum number of positions between the matching terms. + Intervals produced by the rules further apart than this are not + considered matches. Defaults to `-1` if omitted. + :arg ordered: If `true`, intervals produced by the rules should appear + in the order in which they are specified. + :arg filter: Rule used to filter returned intervals. + """ + + intervals: Union[ + Sequence["IntervalsContainer"], Sequence[Dict[str, Any]], DefaultType + ] + max_gaps: Union[int, DefaultType] + ordered: Union[bool, DefaultType] + filter: Union["IntervalsFilter", Dict[str, Any], DefaultType] + + def __init__( + self, + *, + intervals: Union[ + Sequence["IntervalsContainer"], Sequence[Dict[str, Any]], DefaultType + ] = DEFAULT, + max_gaps: Union[int, DefaultType] = DEFAULT, + ordered: Union[bool, DefaultType] = DEFAULT, + filter: Union["IntervalsFilter", Dict[str, Any], DefaultType] = DEFAULT, + **kwargs: Any, + ): + if intervals is not DEFAULT: + kwargs["intervals"] = intervals + if max_gaps is not DEFAULT: + kwargs["max_gaps"] = max_gaps + if ordered is not DEFAULT: + kwargs["ordered"] = ordered + if filter is not DEFAULT: + kwargs["filter"] = filter + super().__init__(kwargs) + + +class IntervalsAnyOf(AttrDict[Any]): + """ + :arg intervals: (required) An array of rules to match. + :arg filter: Rule used to filter returned intervals. + """ + + intervals: Union[ + Sequence["IntervalsContainer"], Sequence[Dict[str, Any]], DefaultType + ] + filter: Union["IntervalsFilter", Dict[str, Any], DefaultType] + + def __init__( + self, + *, + intervals: Union[ + Sequence["IntervalsContainer"], Sequence[Dict[str, Any]], DefaultType + ] = DEFAULT, + filter: Union["IntervalsFilter", Dict[str, Any], DefaultType] = DEFAULT, + **kwargs: Any, + ): + if intervals is not DEFAULT: + kwargs["intervals"] = intervals + if filter is not DEFAULT: + kwargs["filter"] = filter + super().__init__(kwargs) + + +class IntervalsContainer(AttrDict[Any]): + """ + :arg all_of: Returns matches that span a combination of other rules. + :arg any_of: Returns intervals produced by any of its sub-rules. + :arg fuzzy: Matches analyzed text. + :arg match: Matches analyzed text. + :arg prefix: Matches terms that start with a specified set of + characters. + :arg wildcard: Matches terms using a wildcard pattern. + """ + + all_of: Union["IntervalsAllOf", Dict[str, Any], DefaultType] + any_of: Union["IntervalsAnyOf", Dict[str, Any], DefaultType] + fuzzy: Union["IntervalsFuzzy", Dict[str, Any], DefaultType] + match: Union["IntervalsMatch", Dict[str, Any], DefaultType] + prefix: Union["IntervalsPrefix", Dict[str, Any], DefaultType] + wildcard: Union["IntervalsWildcard", Dict[str, Any], DefaultType] + + def __init__( + self, + *, + all_of: Union["IntervalsAllOf", Dict[str, Any], DefaultType] = DEFAULT, + any_of: Union["IntervalsAnyOf", Dict[str, Any], DefaultType] = DEFAULT, + fuzzy: Union["IntervalsFuzzy", Dict[str, Any], DefaultType] = DEFAULT, + match: Union["IntervalsMatch", Dict[str, Any], DefaultType] = DEFAULT, + prefix: Union["IntervalsPrefix", Dict[str, Any], DefaultType] = DEFAULT, + wildcard: Union["IntervalsWildcard", Dict[str, Any], DefaultType] = DEFAULT, + **kwargs: Any, + ): + if all_of is not DEFAULT: + kwargs["all_of"] = all_of + if any_of is not DEFAULT: + kwargs["any_of"] = any_of + if fuzzy is not DEFAULT: + kwargs["fuzzy"] = fuzzy + if match is not DEFAULT: + kwargs["match"] = match + if prefix is not DEFAULT: + kwargs["prefix"] = prefix + if wildcard is not DEFAULT: + kwargs["wildcard"] = wildcard + super().__init__(kwargs) + + +class IntervalsFilter(AttrDict[Any]): + """ + :arg after: Query used to return intervals that follow an interval + from the `filter` rule. + :arg before: Query used to return intervals that occur before an + interval from the `filter` rule. + :arg contained_by: Query used to return intervals contained by an + interval from the `filter` rule. + :arg containing: Query used to return intervals that contain an + interval from the `filter` rule. + :arg not_contained_by: Query used to return intervals that are **not** + contained by an interval from the `filter` rule. + :arg not_containing: Query used to return intervals that do **not** + contain an interval from the `filter` rule. + :arg not_overlapping: Query used to return intervals that do **not** + overlap with an interval from the `filter` rule. + :arg overlapping: Query used to return intervals that overlap with an + interval from the `filter` rule. + :arg script: Script used to return matching documents. This script + must return a boolean value: `true` or `false`. + """ + + after: Union["IntervalsContainer", Dict[str, Any], DefaultType] + before: Union["IntervalsContainer", Dict[str, Any], DefaultType] + contained_by: Union["IntervalsContainer", Dict[str, Any], DefaultType] + containing: Union["IntervalsContainer", Dict[str, Any], DefaultType] + not_contained_by: Union["IntervalsContainer", Dict[str, Any], DefaultType] + not_containing: Union["IntervalsContainer", Dict[str, Any], DefaultType] + not_overlapping: Union["IntervalsContainer", Dict[str, Any], DefaultType] + overlapping: Union["IntervalsContainer", Dict[str, Any], DefaultType] + script: Union["Script", Dict[str, Any], DefaultType] + + def __init__( + self, + *, + after: Union["IntervalsContainer", Dict[str, Any], DefaultType] = DEFAULT, + before: Union["IntervalsContainer", Dict[str, Any], DefaultType] = DEFAULT, + contained_by: Union[ + "IntervalsContainer", Dict[str, Any], DefaultType + ] = DEFAULT, + containing: Union["IntervalsContainer", Dict[str, Any], DefaultType] = DEFAULT, + not_contained_by: Union[ + "IntervalsContainer", Dict[str, Any], DefaultType + ] = DEFAULT, + not_containing: Union[ + "IntervalsContainer", Dict[str, Any], DefaultType + ] = DEFAULT, + not_overlapping: Union[ + "IntervalsContainer", Dict[str, Any], DefaultType + ] = DEFAULT, + overlapping: Union["IntervalsContainer", Dict[str, Any], DefaultType] = DEFAULT, + script: Union["Script", Dict[str, Any], DefaultType] = DEFAULT, + **kwargs: Any, + ): + if after is not DEFAULT: + kwargs["after"] = after + if before is not DEFAULT: + kwargs["before"] = before + if contained_by is not DEFAULT: + kwargs["contained_by"] = contained_by + if containing is not DEFAULT: + kwargs["containing"] = containing + if not_contained_by is not DEFAULT: + kwargs["not_contained_by"] = not_contained_by + if not_containing is not DEFAULT: + kwargs["not_containing"] = not_containing + if not_overlapping is not DEFAULT: + kwargs["not_overlapping"] = not_overlapping + if overlapping is not DEFAULT: + kwargs["overlapping"] = overlapping + if script is not DEFAULT: + kwargs["script"] = script + super().__init__(kwargs) + + +class IntervalsFuzzy(AttrDict[Any]): + """ + :arg term: (required) The term to match. + :arg analyzer: Analyzer used to normalize the term. + :arg fuzziness: Maximum edit distance allowed for matching. Defaults + to `auto` if omitted. + :arg prefix_length: Number of beginning characters left unchanged when + creating expansions. + :arg transpositions: Indicates whether edits include transpositions of + two adjacent characters (for example, `ab` to `ba`). Defaults to + `True` if omitted. + :arg use_field: If specified, match intervals from this field rather + than the top-level field. The `term` is normalized using the + search analyzer from this field, unless `analyzer` is specified + separately. + """ + + term: Union[str, DefaultType] + analyzer: Union[str, DefaultType] + fuzziness: Union[str, int, DefaultType] + prefix_length: Union[int, DefaultType] + transpositions: Union[bool, DefaultType] + use_field: Union[str, InstrumentedField, DefaultType] + + def __init__( + self, + *, + term: Union[str, DefaultType] = DEFAULT, + analyzer: Union[str, DefaultType] = DEFAULT, + fuzziness: Union[str, int, DefaultType] = DEFAULT, + prefix_length: Union[int, DefaultType] = DEFAULT, + transpositions: Union[bool, DefaultType] = DEFAULT, + use_field: Union[str, InstrumentedField, DefaultType] = DEFAULT, + **kwargs: Any, + ): + if term is not DEFAULT: + kwargs["term"] = term + if analyzer is not DEFAULT: + kwargs["analyzer"] = analyzer + if fuzziness is not DEFAULT: + kwargs["fuzziness"] = fuzziness + if prefix_length is not DEFAULT: + kwargs["prefix_length"] = prefix_length + if transpositions is not DEFAULT: + kwargs["transpositions"] = transpositions + if use_field is not DEFAULT: + kwargs["use_field"] = str(use_field) + super().__init__(kwargs) + + +class IntervalsMatch(AttrDict[Any]): + """ + :arg query: (required) Text you wish to find in the provided field. + :arg analyzer: Analyzer used to analyze terms in the query. + :arg max_gaps: Maximum number of positions between the matching terms. + Terms further apart than this are not considered matches. Defaults + to `-1` if omitted. + :arg ordered: If `true`, matching terms must appear in their specified + order. + :arg use_field: If specified, match intervals from this field rather + than the top-level field. The `term` is normalized using the + search analyzer from this field, unless `analyzer` is specified + separately. + :arg filter: An optional interval filter. + """ + + query: Union[str, DefaultType] + analyzer: Union[str, DefaultType] + max_gaps: Union[int, DefaultType] + ordered: Union[bool, DefaultType] + use_field: Union[str, InstrumentedField, DefaultType] + filter: Union["IntervalsFilter", Dict[str, Any], DefaultType] + + def __init__( + self, + *, + query: Union[str, DefaultType] = DEFAULT, + analyzer: Union[str, DefaultType] = DEFAULT, + max_gaps: Union[int, DefaultType] = DEFAULT, + ordered: Union[bool, DefaultType] = DEFAULT, + use_field: Union[str, InstrumentedField, DefaultType] = DEFAULT, + filter: Union["IntervalsFilter", Dict[str, Any], DefaultType] = DEFAULT, + **kwargs: Any, + ): + if query is not DEFAULT: + kwargs["query"] = query + if analyzer is not DEFAULT: + kwargs["analyzer"] = analyzer + if max_gaps is not DEFAULT: + kwargs["max_gaps"] = max_gaps + if ordered is not DEFAULT: + kwargs["ordered"] = ordered + if use_field is not DEFAULT: + kwargs["use_field"] = str(use_field) + if filter is not DEFAULT: + kwargs["filter"] = filter + super().__init__(kwargs) + + +class IntervalsPrefix(AttrDict[Any]): + """ + :arg prefix: (required) Beginning characters of terms you wish to find + in the top-level field. + :arg analyzer: Analyzer used to analyze the `prefix`. + :arg use_field: If specified, match intervals from this field rather + than the top-level field. The `prefix` is normalized using the + search analyzer from this field, unless `analyzer` is specified + separately. + """ + + prefix: Union[str, DefaultType] + analyzer: Union[str, DefaultType] + use_field: Union[str, InstrumentedField, DefaultType] + + def __init__( + self, + *, + prefix: Union[str, DefaultType] = DEFAULT, + analyzer: Union[str, DefaultType] = DEFAULT, + use_field: Union[str, InstrumentedField, DefaultType] = DEFAULT, + **kwargs: Any, + ): + if prefix is not DEFAULT: + kwargs["prefix"] = prefix + if analyzer is not DEFAULT: + kwargs["analyzer"] = analyzer + if use_field is not DEFAULT: + kwargs["use_field"] = str(use_field) + super().__init__(kwargs) + + +class IntervalsQuery(AttrDict[Any]): + """ + :arg all_of: Returns matches that span a combination of other rules. + :arg any_of: Returns intervals produced by any of its sub-rules. + :arg fuzzy: Matches terms that are similar to the provided term, + within an edit distance defined by `fuzziness`. + :arg match: Matches analyzed text. + :arg prefix: Matches terms that start with a specified set of + characters. + :arg wildcard: Matches terms using a wildcard pattern. + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + all_of: Union["IntervalsAllOf", Dict[str, Any], DefaultType] + any_of: Union["IntervalsAnyOf", Dict[str, Any], DefaultType] + fuzzy: Union["IntervalsFuzzy", Dict[str, Any], DefaultType] + match: Union["IntervalsMatch", Dict[str, Any], DefaultType] + prefix: Union["IntervalsPrefix", Dict[str, Any], DefaultType] + wildcard: Union["IntervalsWildcard", Dict[str, Any], DefaultType] + boost: Union[float, DefaultType] + _name: Union[str, DefaultType] + + def __init__( + self, + *, + all_of: Union["IntervalsAllOf", Dict[str, Any], DefaultType] = DEFAULT, + any_of: Union["IntervalsAnyOf", Dict[str, Any], DefaultType] = DEFAULT, + fuzzy: Union["IntervalsFuzzy", Dict[str, Any], DefaultType] = DEFAULT, + match: Union["IntervalsMatch", Dict[str, Any], DefaultType] = DEFAULT, + prefix: Union["IntervalsPrefix", Dict[str, Any], DefaultType] = DEFAULT, + wildcard: Union["IntervalsWildcard", Dict[str, Any], DefaultType] = DEFAULT, + boost: Union[float, DefaultType] = DEFAULT, + _name: Union[str, DefaultType] = DEFAULT, + **kwargs: Any, + ): + if all_of is not DEFAULT: + kwargs["all_of"] = all_of + if any_of is not DEFAULT: + kwargs["any_of"] = any_of + if fuzzy is not DEFAULT: + kwargs["fuzzy"] = fuzzy + if match is not DEFAULT: + kwargs["match"] = match + if prefix is not DEFAULT: + kwargs["prefix"] = prefix + if wildcard is not DEFAULT: + kwargs["wildcard"] = wildcard + if boost is not DEFAULT: + kwargs["boost"] = boost + if _name is not DEFAULT: + kwargs["_name"] = _name + super().__init__(kwargs) + + +class IntervalsWildcard(AttrDict[Any]): + """ + :arg pattern: (required) Wildcard pattern used to find matching terms. + :arg analyzer: Analyzer used to analyze the `pattern`. Defaults to the + top-level field's analyzer. + :arg use_field: If specified, match intervals from this field rather + than the top-level field. The `pattern` is normalized using the + search analyzer from this field, unless `analyzer` is specified + separately. + """ + + pattern: Union[str, DefaultType] + analyzer: Union[str, DefaultType] + use_field: Union[str, InstrumentedField, DefaultType] + + def __init__( + self, + *, + pattern: Union[str, DefaultType] = DEFAULT, + analyzer: Union[str, DefaultType] = DEFAULT, + use_field: Union[str, InstrumentedField, DefaultType] = DEFAULT, + **kwargs: Any, + ): + if pattern is not DEFAULT: + kwargs["pattern"] = pattern + if analyzer is not DEFAULT: + kwargs["analyzer"] = analyzer + if use_field is not DEFAULT: + kwargs["use_field"] = str(use_field) + super().__init__(kwargs) + + +class IpRangeAggregationRange(AttrDict[Any]): + """ + :arg from: Start of the range. + :arg mask: IP range defined as a CIDR mask. + :arg to: End of the range. + """ + + from_: Union[str, None, DefaultType] + mask: Union[str, DefaultType] + to: Union[str, None, DefaultType] + + def __init__( + self, + *, + from_: Union[str, None, DefaultType] = DEFAULT, + mask: Union[str, DefaultType] = DEFAULT, + to: Union[str, None, DefaultType] = DEFAULT, + **kwargs: Any, + ): + if from_ is not DEFAULT: + kwargs["from_"] = from_ + if mask is not DEFAULT: + kwargs["mask"] = mask + if to is not DEFAULT: + kwargs["to"] = to + super().__init__(kwargs) + + +class LatLonGeoLocation(AttrDict[Any]): + """ + :arg lat: (required) Latitude + :arg lon: (required) Longitude + """ + + lat: Union[float, DefaultType] + lon: Union[float, DefaultType] + + def __init__( + self, + *, + lat: Union[float, DefaultType] = DEFAULT, + lon: Union[float, DefaultType] = DEFAULT, + **kwargs: Any, + ): + if lat is not DEFAULT: + kwargs["lat"] = lat + if lon is not DEFAULT: + kwargs["lon"] = lon + super().__init__(kwargs) + + +class LikeDocument(AttrDict[Any]): + """ + :arg doc: A document not present in the index. + :arg fields: + :arg _id: ID of a document. + :arg _index: Index of a document. + :arg per_field_analyzer: Overrides the default analyzer. + :arg routing: + :arg version: + :arg version_type: Defaults to `'internal'` if omitted. + """ + + doc: Any + fields: Union[Sequence[Union[str, InstrumentedField]], DefaultType] + _id: Union[str, DefaultType] + _index: Union[str, DefaultType] + per_field_analyzer: Union[Mapping[Union[str, InstrumentedField], str], DefaultType] + routing: Union[str, DefaultType] + version: Union[int, DefaultType] + version_type: Union[ + Literal["internal", "external", "external_gte", "force"], DefaultType + ] + + def __init__( + self, + *, + doc: Any = DEFAULT, + fields: Union[Sequence[Union[str, InstrumentedField]], DefaultType] = DEFAULT, + _id: Union[str, DefaultType] = DEFAULT, + _index: Union[str, DefaultType] = DEFAULT, + per_field_analyzer: Union[ + Mapping[Union[str, InstrumentedField], str], DefaultType + ] = DEFAULT, + routing: Union[str, DefaultType] = DEFAULT, + version: Union[int, DefaultType] = DEFAULT, + version_type: Union[ + Literal["internal", "external", "external_gte", "force"], DefaultType + ] = DEFAULT, + **kwargs: Any, + ): + if doc is not DEFAULT: + kwargs["doc"] = doc + if fields is not DEFAULT: + kwargs["fields"] = str(fields) + if _id is not DEFAULT: + kwargs["_id"] = _id + if _index is not DEFAULT: + kwargs["_index"] = _index + if per_field_analyzer is not DEFAULT: + kwargs["per_field_analyzer"] = str(per_field_analyzer) + if routing is not DEFAULT: + kwargs["routing"] = routing + if version is not DEFAULT: + kwargs["version"] = version + if version_type is not DEFAULT: + kwargs["version_type"] = version_type + super().__init__(kwargs) + + +class MatchBoolPrefixQuery(AttrDict[Any]): + """ + :arg query: (required) Terms you wish to find in the provided field. + The last term is used in a prefix query. + :arg analyzer: Analyzer used to convert the text in the query value + into tokens. + :arg fuzziness: Maximum edit distance allowed for matching. Can be + applied to the term subqueries constructed for all terms but the + final term. + :arg fuzzy_rewrite: Method used to rewrite the query. Can be applied + to the term subqueries constructed for all terms but the final + term. + :arg fuzzy_transpositions: If `true`, edits for fuzzy matching include + transpositions of two adjacent characters (for example, `ab` to + `ba`). Can be applied to the term subqueries constructed for all + terms but the final term. Defaults to `True` if omitted. + :arg max_expansions: Maximum number of terms to which the query will + expand. Can be applied to the term subqueries constructed for all + terms but the final term. Defaults to `50` if omitted. + :arg minimum_should_match: Minimum number of clauses that must match + for a document to be returned. Applied to the constructed bool + query. + :arg operator: Boolean logic used to interpret text in the query + value. Applied to the constructed bool query. Defaults to `'or'` + if omitted. + :arg prefix_length: Number of beginning characters left unchanged for + fuzzy matching. Can be applied to the term subqueries constructed + for all terms but the final term. + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + query: Union[str, DefaultType] + analyzer: Union[str, DefaultType] + fuzziness: Union[str, int, DefaultType] + fuzzy_rewrite: Union[str, DefaultType] + fuzzy_transpositions: Union[bool, DefaultType] + max_expansions: Union[int, DefaultType] + minimum_should_match: Union[int, str, DefaultType] + operator: Union[Literal["and", "or"], DefaultType] + prefix_length: Union[int, DefaultType] + boost: Union[float, DefaultType] + _name: Union[str, DefaultType] + + def __init__( + self, + *, + query: Union[str, DefaultType] = DEFAULT, + analyzer: Union[str, DefaultType] = DEFAULT, + fuzziness: Union[str, int, DefaultType] = DEFAULT, + fuzzy_rewrite: Union[str, DefaultType] = DEFAULT, + fuzzy_transpositions: Union[bool, DefaultType] = DEFAULT, + max_expansions: Union[int, DefaultType] = DEFAULT, + minimum_should_match: Union[int, str, DefaultType] = DEFAULT, + operator: Union[Literal["and", "or"], DefaultType] = DEFAULT, + prefix_length: Union[int, DefaultType] = DEFAULT, + boost: Union[float, DefaultType] = DEFAULT, + _name: Union[str, DefaultType] = DEFAULT, + **kwargs: Any, + ): + if query is not DEFAULT: + kwargs["query"] = query + if analyzer is not DEFAULT: + kwargs["analyzer"] = analyzer + if fuzziness is not DEFAULT: + kwargs["fuzziness"] = fuzziness + if fuzzy_rewrite is not DEFAULT: + kwargs["fuzzy_rewrite"] = fuzzy_rewrite + if fuzzy_transpositions is not DEFAULT: + kwargs["fuzzy_transpositions"] = fuzzy_transpositions + if max_expansions is not DEFAULT: + kwargs["max_expansions"] = max_expansions + if minimum_should_match is not DEFAULT: + kwargs["minimum_should_match"] = minimum_should_match + if operator is not DEFAULT: + kwargs["operator"] = operator + if prefix_length is not DEFAULT: + kwargs["prefix_length"] = prefix_length + if boost is not DEFAULT: + kwargs["boost"] = boost + if _name is not DEFAULT: + kwargs["_name"] = _name + super().__init__(kwargs) + + +class MatchPhrasePrefixQuery(AttrDict[Any]): + """ + :arg query: (required) Text you wish to find in the provided field. + :arg analyzer: Analyzer used to convert text in the query value into + tokens. + :arg max_expansions: Maximum number of terms to which the last + provided term of the query value will expand. Defaults to `50` if + omitted. + :arg slop: Maximum number of positions allowed between matching + tokens. + :arg zero_terms_query: Indicates whether no documents are returned if + the analyzer removes all tokens, such as when using a `stop` + filter. Defaults to `none` if omitted. + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + query: Union[str, DefaultType] + analyzer: Union[str, DefaultType] + max_expansions: Union[int, DefaultType] + slop: Union[int, DefaultType] + zero_terms_query: Union[Literal["all", "none"], DefaultType] + boost: Union[float, DefaultType] + _name: Union[str, DefaultType] + + def __init__( + self, + *, + query: Union[str, DefaultType] = DEFAULT, + analyzer: Union[str, DefaultType] = DEFAULT, + max_expansions: Union[int, DefaultType] = DEFAULT, + slop: Union[int, DefaultType] = DEFAULT, + zero_terms_query: Union[Literal["all", "none"], DefaultType] = DEFAULT, + boost: Union[float, DefaultType] = DEFAULT, + _name: Union[str, DefaultType] = DEFAULT, + **kwargs: Any, + ): + if query is not DEFAULT: + kwargs["query"] = query + if analyzer is not DEFAULT: + kwargs["analyzer"] = analyzer + if max_expansions is not DEFAULT: + kwargs["max_expansions"] = max_expansions + if slop is not DEFAULT: + kwargs["slop"] = slop + if zero_terms_query is not DEFAULT: + kwargs["zero_terms_query"] = zero_terms_query + if boost is not DEFAULT: + kwargs["boost"] = boost + if _name is not DEFAULT: + kwargs["_name"] = _name + super().__init__(kwargs) + + +class MatchPhraseQuery(AttrDict[Any]): + """ + :arg query: (required) Query terms that are analyzed and turned into a + phrase query. + :arg analyzer: Analyzer used to convert the text in the query value + into tokens. + :arg slop: Maximum number of positions allowed between matching + tokens. + :arg zero_terms_query: Indicates whether no documents are returned if + the `analyzer` removes all tokens, such as when using a `stop` + filter. Defaults to `'none'` if omitted. + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + query: Union[str, DefaultType] + analyzer: Union[str, DefaultType] + slop: Union[int, DefaultType] + zero_terms_query: Union[Literal["all", "none"], DefaultType] + boost: Union[float, DefaultType] + _name: Union[str, DefaultType] + + def __init__( + self, + *, + query: Union[str, DefaultType] = DEFAULT, + analyzer: Union[str, DefaultType] = DEFAULT, + slop: Union[int, DefaultType] = DEFAULT, + zero_terms_query: Union[Literal["all", "none"], DefaultType] = DEFAULT, + boost: Union[float, DefaultType] = DEFAULT, + _name: Union[str, DefaultType] = DEFAULT, + **kwargs: Any, + ): + if query is not DEFAULT: + kwargs["query"] = query + if analyzer is not DEFAULT: + kwargs["analyzer"] = analyzer + if slop is not DEFAULT: + kwargs["slop"] = slop + if zero_terms_query is not DEFAULT: + kwargs["zero_terms_query"] = zero_terms_query + if boost is not DEFAULT: + kwargs["boost"] = boost + if _name is not DEFAULT: + kwargs["_name"] = _name + super().__init__(kwargs) + + +class MatchQuery(AttrDict[Any]): + """ + :arg query: (required) Text, number, boolean value or date you wish to + find in the provided field. + :arg analyzer: Analyzer used to convert the text in the query value + into tokens. + :arg auto_generate_synonyms_phrase_query: If `true`, match phrase + queries are automatically created for multi-term synonyms. + Defaults to `True` if omitted. + :arg cutoff_frequency: + :arg fuzziness: Maximum edit distance allowed for matching. + :arg fuzzy_rewrite: Method used to rewrite the query. + :arg fuzzy_transpositions: If `true`, edits for fuzzy matching include + transpositions of two adjacent characters (for example, `ab` to + `ba`). Defaults to `True` if omitted. + :arg lenient: If `true`, format-based errors, such as providing a text + query value for a numeric field, are ignored. + :arg max_expansions: Maximum number of terms to which the query will + expand. Defaults to `50` if omitted. + :arg minimum_should_match: Minimum number of clauses that must match + for a document to be returned. + :arg operator: Boolean logic used to interpret text in the query + value. Defaults to `'or'` if omitted. + :arg prefix_length: Number of beginning characters left unchanged for + fuzzy matching. + :arg zero_terms_query: Indicates whether no documents are returned if + the `analyzer` removes all tokens, such as when using a `stop` + filter. Defaults to `'none'` if omitted. + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + query: Union[str, float, bool, DefaultType] + analyzer: Union[str, DefaultType] + auto_generate_synonyms_phrase_query: Union[bool, DefaultType] + cutoff_frequency: Union[float, DefaultType] + fuzziness: Union[str, int, DefaultType] + fuzzy_rewrite: Union[str, DefaultType] + fuzzy_transpositions: Union[bool, DefaultType] + lenient: Union[bool, DefaultType] + max_expansions: Union[int, DefaultType] + minimum_should_match: Union[int, str, DefaultType] + operator: Union[Literal["and", "or"], DefaultType] + prefix_length: Union[int, DefaultType] + zero_terms_query: Union[Literal["all", "none"], DefaultType] + boost: Union[float, DefaultType] + _name: Union[str, DefaultType] + + def __init__( + self, + *, + query: Union[str, float, bool, DefaultType] = DEFAULT, + analyzer: Union[str, DefaultType] = DEFAULT, + auto_generate_synonyms_phrase_query: Union[bool, DefaultType] = DEFAULT, + cutoff_frequency: Union[float, DefaultType] = DEFAULT, + fuzziness: Union[str, int, DefaultType] = DEFAULT, + fuzzy_rewrite: Union[str, DefaultType] = DEFAULT, + fuzzy_transpositions: Union[bool, DefaultType] = DEFAULT, + lenient: Union[bool, DefaultType] = DEFAULT, + max_expansions: Union[int, DefaultType] = DEFAULT, + minimum_should_match: Union[int, str, DefaultType] = DEFAULT, + operator: Union[Literal["and", "or"], DefaultType] = DEFAULT, + prefix_length: Union[int, DefaultType] = DEFAULT, + zero_terms_query: Union[Literal["all", "none"], DefaultType] = DEFAULT, + boost: Union[float, DefaultType] = DEFAULT, + _name: Union[str, DefaultType] = DEFAULT, + **kwargs: Any, + ): + if query is not DEFAULT: + kwargs["query"] = query + if analyzer is not DEFAULT: + kwargs["analyzer"] = analyzer + if auto_generate_synonyms_phrase_query is not DEFAULT: + kwargs["auto_generate_synonyms_phrase_query"] = ( + auto_generate_synonyms_phrase_query + ) + if cutoff_frequency is not DEFAULT: + kwargs["cutoff_frequency"] = cutoff_frequency + if fuzziness is not DEFAULT: + kwargs["fuzziness"] = fuzziness + if fuzzy_rewrite is not DEFAULT: + kwargs["fuzzy_rewrite"] = fuzzy_rewrite + if fuzzy_transpositions is not DEFAULT: + kwargs["fuzzy_transpositions"] = fuzzy_transpositions + if lenient is not DEFAULT: + kwargs["lenient"] = lenient + if max_expansions is not DEFAULT: + kwargs["max_expansions"] = max_expansions + if minimum_should_match is not DEFAULT: + kwargs["minimum_should_match"] = minimum_should_match + if operator is not DEFAULT: + kwargs["operator"] = operator + if prefix_length is not DEFAULT: + kwargs["prefix_length"] = prefix_length + if zero_terms_query is not DEFAULT: + kwargs["zero_terms_query"] = zero_terms_query + if boost is not DEFAULT: + kwargs["boost"] = boost + if _name is not DEFAULT: + kwargs["_name"] = _name + super().__init__(kwargs) + + +class MultiTermLookup(AttrDict[Any]): + """ + :arg field: (required) A fields from which to retrieve terms. + :arg missing: The value to apply to documents that do not have a + value. By default, documents without a value are ignored. + """ + + field: Union[str, InstrumentedField, DefaultType] + missing: Union[str, int, float, bool, DefaultType] + + def __init__( + self, + *, + field: Union[str, InstrumentedField, DefaultType] = DEFAULT, + missing: Union[str, int, float, bool, DefaultType] = DEFAULT, + **kwargs: Any, + ): + if field is not DEFAULT: + kwargs["field"] = str(field) + if missing is not DEFAULT: + kwargs["missing"] = missing + super().__init__(kwargs) + + +class MutualInformationHeuristic(AttrDict[Any]): + """ + :arg background_is_superset: Set to `false` if you defined a custom + background filter that represents a different set of documents + that you want to compare to. + :arg include_negatives: Set to `false` to filter out the terms that + appear less often in the subset than in documents outside the + subset. + """ + + background_is_superset: Union[bool, DefaultType] + include_negatives: Union[bool, DefaultType] + + def __init__( + self, + *, + background_is_superset: Union[bool, DefaultType] = DEFAULT, + include_negatives: Union[bool, DefaultType] = DEFAULT, + **kwargs: Any, + ): + if background_is_superset is not DEFAULT: + kwargs["background_is_superset"] = background_is_superset + if include_negatives is not DEFAULT: + kwargs["include_negatives"] = include_negatives + super().__init__(kwargs) + + +class NestedSortValue(AttrDict[Any]): + """ + :arg path: (required) + :arg filter: + :arg max_children: + :arg nested: + """ + + path: Union[str, InstrumentedField, DefaultType] + filter: Union[Query, DefaultType] + max_children: Union[int, DefaultType] + nested: Union["NestedSortValue", Dict[str, Any], DefaultType] + + def __init__( + self, + *, + path: Union[str, InstrumentedField, DefaultType] = DEFAULT, + filter: Union[Query, DefaultType] = DEFAULT, + max_children: Union[int, DefaultType] = DEFAULT, + nested: Union["NestedSortValue", Dict[str, Any], DefaultType] = DEFAULT, + **kwargs: Any, + ): + if path is not DEFAULT: + kwargs["path"] = str(path) + if filter is not DEFAULT: + kwargs["filter"] = filter + if max_children is not DEFAULT: + kwargs["max_children"] = max_children + if nested is not DEFAULT: + kwargs["nested"] = nested + super().__init__(kwargs) + + +class PercentageScoreHeuristic(AttrDict[Any]): + pass + + +class PinnedDoc(AttrDict[Any]): + """ + :arg _id: (required) The unique document ID. + :arg _index: (required) The index that contains the document. + """ + + _id: Union[str, DefaultType] + _index: Union[str, DefaultType] + + def __init__( + self, + *, + _id: Union[str, DefaultType] = DEFAULT, + _index: Union[str, DefaultType] = DEFAULT, + **kwargs: Any, + ): + if _id is not DEFAULT: + kwargs["_id"] = _id + if _index is not DEFAULT: + kwargs["_index"] = _index + super().__init__(kwargs) + + +class PrefixQuery(AttrDict[Any]): + """ + :arg value: (required) Beginning characters of terms you wish to find + in the provided field. + :arg rewrite: Method used to rewrite the query. + :arg case_insensitive: Allows ASCII case insensitive matching of the + value with the indexed field values when set to `true`. Default is + `false` which means the case sensitivity of matching depends on + the underlying field’s mapping. + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + value: Union[str, DefaultType] + rewrite: Union[str, DefaultType] + case_insensitive: Union[bool, DefaultType] + boost: Union[float, DefaultType] + _name: Union[str, DefaultType] + + def __init__( + self, + *, + value: Union[str, DefaultType] = DEFAULT, + rewrite: Union[str, DefaultType] = DEFAULT, + case_insensitive: Union[bool, DefaultType] = DEFAULT, + boost: Union[float, DefaultType] = DEFAULT, + _name: Union[str, DefaultType] = DEFAULT, + **kwargs: Any, + ): + if value is not DEFAULT: + kwargs["value"] = value + if rewrite is not DEFAULT: + kwargs["rewrite"] = rewrite + if case_insensitive is not DEFAULT: + kwargs["case_insensitive"] = case_insensitive + if boost is not DEFAULT: + kwargs["boost"] = boost + if _name is not DEFAULT: + kwargs["_name"] = _name + super().__init__(kwargs) + + +class QueryVectorBuilder(AttrDict[Any]): + """ + :arg text_embedding: + """ + + text_embedding: Union["TextEmbedding", Dict[str, Any], DefaultType] + + def __init__( + self, + *, + text_embedding: Union["TextEmbedding", Dict[str, Any], DefaultType] = DEFAULT, + **kwargs: Any, + ): + if text_embedding is not DEFAULT: + kwargs["text_embedding"] = text_embedding + super().__init__(kwargs) + + +class RankFeatureFunctionLinear(AttrDict[Any]): + pass + + +class RankFeatureFunctionLogarithm(AttrDict[Any]): + """ + :arg scaling_factor: (required) Configurable scaling factor. + """ + + scaling_factor: Union[float, DefaultType] + + def __init__( + self, *, scaling_factor: Union[float, DefaultType] = DEFAULT, **kwargs: Any + ): + if scaling_factor is not DEFAULT: + kwargs["scaling_factor"] = scaling_factor + super().__init__(kwargs) + + +class RankFeatureFunctionSaturation(AttrDict[Any]): + """ + :arg pivot: Configurable pivot value so that the result will be less + than 0.5. + """ + + pivot: Union[float, DefaultType] + + def __init__(self, *, pivot: Union[float, DefaultType] = DEFAULT, **kwargs: Any): + if pivot is not DEFAULT: + kwargs["pivot"] = pivot + super().__init__(kwargs) + + +class RankFeatureFunctionSigmoid(AttrDict[Any]): + """ + :arg pivot: (required) Configurable pivot value so that the result + will be less than 0.5. + :arg exponent: (required) Configurable Exponent. + """ + + pivot: Union[float, DefaultType] + exponent: Union[float, DefaultType] + + def __init__( + self, + *, + pivot: Union[float, DefaultType] = DEFAULT, + exponent: Union[float, DefaultType] = DEFAULT, + **kwargs: Any, + ): + if pivot is not DEFAULT: + kwargs["pivot"] = pivot + if exponent is not DEFAULT: + kwargs["exponent"] = exponent + super().__init__(kwargs) + + +class RegexpQuery(AttrDict[Any]): + """ + :arg value: (required) Regular expression for terms you wish to find + in the provided field. + :arg case_insensitive: Allows case insensitive matching of the regular + expression value with the indexed field values when set to `true`. + When `false`, case sensitivity of matching depends on the + underlying field’s mapping. + :arg flags: Enables optional operators for the regular expression. + :arg max_determinized_states: Maximum number of automaton states + required for the query. Defaults to `10000` if omitted. + :arg rewrite: Method used to rewrite the query. + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + value: Union[str, DefaultType] + case_insensitive: Union[bool, DefaultType] + flags: Union[str, DefaultType] + max_determinized_states: Union[int, DefaultType] + rewrite: Union[str, DefaultType] + boost: Union[float, DefaultType] + _name: Union[str, DefaultType] + + def __init__( + self, + *, + value: Union[str, DefaultType] = DEFAULT, + case_insensitive: Union[bool, DefaultType] = DEFAULT, + flags: Union[str, DefaultType] = DEFAULT, + max_determinized_states: Union[int, DefaultType] = DEFAULT, + rewrite: Union[str, DefaultType] = DEFAULT, + boost: Union[float, DefaultType] = DEFAULT, + _name: Union[str, DefaultType] = DEFAULT, + **kwargs: Any, + ): + if value is not DEFAULT: + kwargs["value"] = value + if case_insensitive is not DEFAULT: + kwargs["case_insensitive"] = case_insensitive + if flags is not DEFAULT: + kwargs["flags"] = flags + if max_determinized_states is not DEFAULT: + kwargs["max_determinized_states"] = max_determinized_states + if rewrite is not DEFAULT: + kwargs["rewrite"] = rewrite + if boost is not DEFAULT: + kwargs["boost"] = boost + if _name is not DEFAULT: + kwargs["_name"] = _name + super().__init__(kwargs) + + +class RegressionInferenceOptions(AttrDict[Any]): + """ + :arg results_field: The field that is added to incoming documents to + contain the inference prediction. Defaults to predicted_value. + :arg num_top_feature_importance_values: Specifies the maximum number + of feature importance values per document. + """ + + results_field: Union[str, InstrumentedField, DefaultType] + num_top_feature_importance_values: Union[int, DefaultType] + + def __init__( + self, + *, + results_field: Union[str, InstrumentedField, DefaultType] = DEFAULT, + num_top_feature_importance_values: Union[int, DefaultType] = DEFAULT, + **kwargs: Any, + ): + if results_field is not DEFAULT: + kwargs["results_field"] = str(results_field) + if num_top_feature_importance_values is not DEFAULT: + kwargs["num_top_feature_importance_values"] = ( + num_top_feature_importance_values + ) + super().__init__(kwargs) + + +class ScoreSort(AttrDict[Any]): + """ + :arg order: + """ + + order: Union[Literal["asc", "desc"], DefaultType] + + def __init__( + self, + *, + order: Union[Literal["asc", "desc"], DefaultType] = DEFAULT, + **kwargs: Any, + ): + if order is not DEFAULT: + kwargs["order"] = order + super().__init__(kwargs) + + +class Script(AttrDict[Any]): + """ + :arg source: The script source. + :arg id: The `id` for a stored script. + :arg params: Specifies any named parameters that are passed into the + script as variables. Use parameters instead of hard-coded values + to decrease compile time. + :arg lang: Specifies the language the script is written in. Defaults + to `painless` if omitted. + :arg options: + """ + + source: Union[str, DefaultType] + id: Union[str, DefaultType] + params: Union[Mapping[str, Any], DefaultType] + lang: Union[Literal["painless", "expression", "mustache", "java"], DefaultType] + options: Union[Mapping[str, str], DefaultType] + + def __init__( + self, + *, + source: Union[str, DefaultType] = DEFAULT, + id: Union[str, DefaultType] = DEFAULT, + params: Union[Mapping[str, Any], DefaultType] = DEFAULT, + lang: Union[ + Literal["painless", "expression", "mustache", "java"], DefaultType + ] = DEFAULT, + options: Union[Mapping[str, str], DefaultType] = DEFAULT, + **kwargs: Any, + ): + if source is not DEFAULT: + kwargs["source"] = source + if id is not DEFAULT: + kwargs["id"] = id + if params is not DEFAULT: + kwargs["params"] = params + if lang is not DEFAULT: + kwargs["lang"] = lang + if options is not DEFAULT: + kwargs["options"] = options + super().__init__(kwargs) + + +class ScriptField(AttrDict[Any]): + """ + :arg script: (required) + :arg ignore_failure: + """ + + script: Union["Script", Dict[str, Any], DefaultType] + ignore_failure: Union[bool, DefaultType] + + def __init__( + self, + *, + script: Union["Script", Dict[str, Any], DefaultType] = DEFAULT, + ignore_failure: Union[bool, DefaultType] = DEFAULT, + **kwargs: Any, + ): + if script is not DEFAULT: + kwargs["script"] = script + if ignore_failure is not DEFAULT: + kwargs["ignore_failure"] = ignore_failure + super().__init__(kwargs) + + +class ScriptSort(AttrDict[Any]): + """ + :arg script: (required) + :arg order: + :arg type: + :arg mode: + :arg nested: + """ + + script: Union["Script", Dict[str, Any], DefaultType] + order: Union[Literal["asc", "desc"], DefaultType] + type: Union[Literal["string", "number", "version"], DefaultType] + mode: Union[Literal["min", "max", "sum", "avg", "median"], DefaultType] + nested: Union["NestedSortValue", Dict[str, Any], DefaultType] + + def __init__( + self, + *, + script: Union["Script", Dict[str, Any], DefaultType] = DEFAULT, + order: Union[Literal["asc", "desc"], DefaultType] = DEFAULT, + type: Union[Literal["string", "number", "version"], DefaultType] = DEFAULT, + mode: Union[ + Literal["min", "max", "sum", "avg", "median"], DefaultType + ] = DEFAULT, + nested: Union["NestedSortValue", Dict[str, Any], DefaultType] = DEFAULT, + **kwargs: Any, + ): + if script is not DEFAULT: + kwargs["script"] = script + if order is not DEFAULT: + kwargs["order"] = order + if type is not DEFAULT: + kwargs["type"] = type + if mode is not DEFAULT: + kwargs["mode"] = mode + if nested is not DEFAULT: + kwargs["nested"] = nested + super().__init__(kwargs) + + +class ScriptedHeuristic(AttrDict[Any]): + """ + :arg script: (required) + """ + + script: Union["Script", Dict[str, Any], DefaultType] + + def __init__( + self, + *, + script: Union["Script", Dict[str, Any], DefaultType] = DEFAULT, + **kwargs: Any, + ): + if script is not DEFAULT: + kwargs["script"] = script + super().__init__(kwargs) + + +class ShapeFieldQuery(AttrDict[Any]): + """ + :arg indexed_shape: Queries using a pre-indexed shape. + :arg relation: Spatial relation between the query shape and the + document shape. + :arg shape: Queries using an inline shape definition in GeoJSON or + Well Known Text (WKT) format. + """ + + indexed_shape: Union["FieldLookup", Dict[str, Any], DefaultType] + relation: Union[ + Literal["intersects", "disjoint", "within", "contains"], DefaultType + ] + shape: Any + + def __init__( + self, + *, + indexed_shape: Union["FieldLookup", Dict[str, Any], DefaultType] = DEFAULT, + relation: Union[ + Literal["intersects", "disjoint", "within", "contains"], DefaultType + ] = DEFAULT, + shape: Any = DEFAULT, + **kwargs: Any, + ): + if indexed_shape is not DEFAULT: + kwargs["indexed_shape"] = indexed_shape + if relation is not DEFAULT: + kwargs["relation"] = relation + if shape is not DEFAULT: + kwargs["shape"] = shape + super().__init__(kwargs) + + +class SortOptions(AttrDict[Any]): + """ + :arg _field: The field to use in this query. + :arg _value: The query value for the field. + :arg _score: + :arg _doc: + :arg _geo_distance: + :arg _script: + """ + + _field: Union[str, "InstrumentedField", "DefaultType"] + _value: Union["FieldSort", Dict[str, Any], "DefaultType"] + _score: Union["ScoreSort", Dict[str, Any], DefaultType] + _doc: Union["ScoreSort", Dict[str, Any], DefaultType] + _geo_distance: Union["GeoDistanceSort", Dict[str, Any], DefaultType] + _script: Union["ScriptSort", Dict[str, Any], DefaultType] + + def __init__( + self, + _field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, + _value: Union["FieldSort", Dict[str, Any], "DefaultType"] = DEFAULT, + *, + _score: Union["ScoreSort", Dict[str, Any], DefaultType] = DEFAULT, + _doc: Union["ScoreSort", Dict[str, Any], DefaultType] = DEFAULT, + _geo_distance: Union["GeoDistanceSort", Dict[str, Any], DefaultType] = DEFAULT, + _script: Union["ScriptSort", Dict[str, Any], DefaultType] = DEFAULT, + **kwargs: Any, + ): + if _field is not DEFAULT: + kwargs[str(_field)] = _value + if _score is not DEFAULT: + kwargs["_score"] = _score + if _doc is not DEFAULT: + kwargs["_doc"] = _doc + if _geo_distance is not DEFAULT: + kwargs["_geo_distance"] = _geo_distance + if _script is not DEFAULT: + kwargs["_script"] = _script + super().__init__(kwargs) + + +class SourceFilter(AttrDict[Any]): + """ + :arg excludes: + :arg includes: + """ + + excludes: Union[ + Union[str, InstrumentedField], + Sequence[Union[str, InstrumentedField]], + DefaultType, + ] + includes: Union[ + Union[str, InstrumentedField], + Sequence[Union[str, InstrumentedField]], + DefaultType, + ] + + def __init__( + self, + *, + excludes: Union[ + Union[str, InstrumentedField], + Sequence[Union[str, InstrumentedField]], + DefaultType, + ] = DEFAULT, + includes: Union[ + Union[str, InstrumentedField], + Sequence[Union[str, InstrumentedField]], + DefaultType, + ] = DEFAULT, + **kwargs: Any, + ): + if excludes is not DEFAULT: + kwargs["excludes"] = str(excludes) + if includes is not DEFAULT: + kwargs["includes"] = str(includes) + super().__init__(kwargs) + + +class SpanContainingQuery(AttrDict[Any]): + """ + :arg big: (required) Can be any span query. Matching spans from `big` + that contain matches from `little` are returned. + :arg little: (required) Can be any span query. Matching spans from + `big` that contain matches from `little` are returned. + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + big: Union["SpanQuery", Dict[str, Any], DefaultType] + little: Union["SpanQuery", Dict[str, Any], DefaultType] + boost: Union[float, DefaultType] + _name: Union[str, DefaultType] + + def __init__( + self, + *, + big: Union["SpanQuery", Dict[str, Any], DefaultType] = DEFAULT, + little: Union["SpanQuery", Dict[str, Any], DefaultType] = DEFAULT, + boost: Union[float, DefaultType] = DEFAULT, + _name: Union[str, DefaultType] = DEFAULT, + **kwargs: Any, + ): + if big is not DEFAULT: + kwargs["big"] = big + if little is not DEFAULT: + kwargs["little"] = little + if boost is not DEFAULT: + kwargs["boost"] = boost + if _name is not DEFAULT: + kwargs["_name"] = _name + super().__init__(kwargs) + + +class SpanFieldMaskingQuery(AttrDict[Any]): + """ + :arg field: (required) + :arg query: (required) + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + field: Union[str, InstrumentedField, DefaultType] + query: Union["SpanQuery", Dict[str, Any], DefaultType] + boost: Union[float, DefaultType] + _name: Union[str, DefaultType] + + def __init__( + self, + *, + field: Union[str, InstrumentedField, DefaultType] = DEFAULT, + query: Union["SpanQuery", Dict[str, Any], DefaultType] = DEFAULT, + boost: Union[float, DefaultType] = DEFAULT, + _name: Union[str, DefaultType] = DEFAULT, + **kwargs: Any, + ): + if field is not DEFAULT: + kwargs["field"] = str(field) + if query is not DEFAULT: + kwargs["query"] = query + if boost is not DEFAULT: + kwargs["boost"] = boost + if _name is not DEFAULT: + kwargs["_name"] = _name + super().__init__(kwargs) + + +class SpanFirstQuery(AttrDict[Any]): + """ + :arg end: (required) Controls the maximum end position permitted in a + match. + :arg match: (required) Can be any other span type query. + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + end: Union[int, DefaultType] + match: Union["SpanQuery", Dict[str, Any], DefaultType] + boost: Union[float, DefaultType] + _name: Union[str, DefaultType] + + def __init__( + self, + *, + end: Union[int, DefaultType] = DEFAULT, + match: Union["SpanQuery", Dict[str, Any], DefaultType] = DEFAULT, + boost: Union[float, DefaultType] = DEFAULT, + _name: Union[str, DefaultType] = DEFAULT, + **kwargs: Any, + ): + if end is not DEFAULT: + kwargs["end"] = end + if match is not DEFAULT: + kwargs["match"] = match + if boost is not DEFAULT: + kwargs["boost"] = boost + if _name is not DEFAULT: + kwargs["_name"] = _name + super().__init__(kwargs) + + +class SpanMultiTermQuery(AttrDict[Any]): + """ + :arg match: (required) Should be a multi term query (one of + `wildcard`, `fuzzy`, `prefix`, `range`, or `regexp` query). + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + match: Union[Query, DefaultType] + boost: Union[float, DefaultType] + _name: Union[str, DefaultType] + + def __init__( + self, + *, + match: Union[Query, DefaultType] = DEFAULT, + boost: Union[float, DefaultType] = DEFAULT, + _name: Union[str, DefaultType] = DEFAULT, + **kwargs: Any, + ): + if match is not DEFAULT: + kwargs["match"] = match + if boost is not DEFAULT: + kwargs["boost"] = boost + if _name is not DEFAULT: + kwargs["_name"] = _name + super().__init__(kwargs) + + +class SpanNearQuery(AttrDict[Any]): + """ + :arg clauses: (required) Array of one or more other span type queries. + :arg in_order: Controls whether matches are required to be in-order. + :arg slop: Controls the maximum number of intervening unmatched + positions permitted. + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + clauses: Union[Sequence["SpanQuery"], Sequence[Dict[str, Any]], DefaultType] + in_order: Union[bool, DefaultType] + slop: Union[int, DefaultType] + boost: Union[float, DefaultType] + _name: Union[str, DefaultType] + + def __init__( + self, + *, + clauses: Union[ + Sequence["SpanQuery"], Sequence[Dict[str, Any]], DefaultType + ] = DEFAULT, + in_order: Union[bool, DefaultType] = DEFAULT, + slop: Union[int, DefaultType] = DEFAULT, + boost: Union[float, DefaultType] = DEFAULT, + _name: Union[str, DefaultType] = DEFAULT, + **kwargs: Any, + ): + if clauses is not DEFAULT: + kwargs["clauses"] = clauses + if in_order is not DEFAULT: + kwargs["in_order"] = in_order + if slop is not DEFAULT: + kwargs["slop"] = slop + if boost is not DEFAULT: + kwargs["boost"] = boost + if _name is not DEFAULT: + kwargs["_name"] = _name + super().__init__(kwargs) + + +class SpanNotQuery(AttrDict[Any]): + """ + :arg exclude: (required) Span query whose matches must not overlap + those returned. + :arg include: (required) Span query whose matches are filtered. + :arg dist: The number of tokens from within the include span that + can’t have overlap with the exclude span. Equivalent to setting + both `pre` and `post`. + :arg post: The number of tokens after the include span that can’t have + overlap with the exclude span. + :arg pre: The number of tokens before the include span that can’t have + overlap with the exclude span. + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + exclude: Union["SpanQuery", Dict[str, Any], DefaultType] + include: Union["SpanQuery", Dict[str, Any], DefaultType] + dist: Union[int, DefaultType] + post: Union[int, DefaultType] + pre: Union[int, DefaultType] + boost: Union[float, DefaultType] + _name: Union[str, DefaultType] + + def __init__( + self, + *, + exclude: Union["SpanQuery", Dict[str, Any], DefaultType] = DEFAULT, + include: Union["SpanQuery", Dict[str, Any], DefaultType] = DEFAULT, + dist: Union[int, DefaultType] = DEFAULT, + post: Union[int, DefaultType] = DEFAULT, + pre: Union[int, DefaultType] = DEFAULT, + boost: Union[float, DefaultType] = DEFAULT, + _name: Union[str, DefaultType] = DEFAULT, + **kwargs: Any, + ): + if exclude is not DEFAULT: + kwargs["exclude"] = exclude + if include is not DEFAULT: + kwargs["include"] = include + if dist is not DEFAULT: + kwargs["dist"] = dist + if post is not DEFAULT: + kwargs["post"] = post + if pre is not DEFAULT: + kwargs["pre"] = pre + if boost is not DEFAULT: + kwargs["boost"] = boost + if _name is not DEFAULT: + kwargs["_name"] = _name + super().__init__(kwargs) + + +class SpanOrQuery(AttrDict[Any]): + """ + :arg clauses: (required) Array of one or more other span type queries. + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + clauses: Union[Sequence["SpanQuery"], Sequence[Dict[str, Any]], DefaultType] + boost: Union[float, DefaultType] + _name: Union[str, DefaultType] + + def __init__( + self, + *, + clauses: Union[ + Sequence["SpanQuery"], Sequence[Dict[str, Any]], DefaultType + ] = DEFAULT, + boost: Union[float, DefaultType] = DEFAULT, + _name: Union[str, DefaultType] = DEFAULT, + **kwargs: Any, + ): + if clauses is not DEFAULT: + kwargs["clauses"] = clauses + if boost is not DEFAULT: + kwargs["boost"] = boost + if _name is not DEFAULT: + kwargs["_name"] = _name + super().__init__(kwargs) + + +class SpanQuery(AttrDict[Any]): + """ + :arg span_containing: Accepts a list of span queries, but only returns + those spans which also match a second span query. + :arg span_field_masking: Allows queries like `span_near` or `span_or` + across different fields. + :arg span_first: Accepts another span query whose matches must appear + within the first N positions of the field. + :arg span_gap: + :arg span_multi: Wraps a `term`, `range`, `prefix`, `wildcard`, + `regexp`, or `fuzzy` query. + :arg span_near: Accepts multiple span queries whose matches must be + within the specified distance of each other, and possibly in the + same order. + :arg span_not: Wraps another span query, and excludes any documents + which match that query. + :arg span_or: Combines multiple span queries and returns documents + which match any of the specified queries. + :arg span_term: The equivalent of the `term` query but for use with + other span queries. + :arg span_within: The result from a single span query is returned as + long is its span falls within the spans returned by a list of + other span queries. + """ + + span_containing: Union["SpanContainingQuery", Dict[str, Any], DefaultType] + span_field_masking: Union["SpanFieldMaskingQuery", Dict[str, Any], DefaultType] + span_first: Union["SpanFirstQuery", Dict[str, Any], DefaultType] + span_gap: Union[Mapping[Union[str, InstrumentedField], int], DefaultType] + span_multi: Union["SpanMultiTermQuery", Dict[str, Any], DefaultType] + span_near: Union["SpanNearQuery", Dict[str, Any], DefaultType] + span_not: Union["SpanNotQuery", Dict[str, Any], DefaultType] + span_or: Union["SpanOrQuery", Dict[str, Any], DefaultType] + span_term: Union[ + Mapping[Union[str, InstrumentedField], "SpanTermQuery"], + Dict[str, Any], + DefaultType, + ] + span_within: Union["SpanWithinQuery", Dict[str, Any], DefaultType] + + def __init__( + self, + *, + span_containing: Union[ + "SpanContainingQuery", Dict[str, Any], DefaultType + ] = DEFAULT, + span_field_masking: Union[ + "SpanFieldMaskingQuery", Dict[str, Any], DefaultType + ] = DEFAULT, + span_first: Union["SpanFirstQuery", Dict[str, Any], DefaultType] = DEFAULT, + span_gap: Union[ + Mapping[Union[str, InstrumentedField], int], DefaultType + ] = DEFAULT, + span_multi: Union["SpanMultiTermQuery", Dict[str, Any], DefaultType] = DEFAULT, + span_near: Union["SpanNearQuery", Dict[str, Any], DefaultType] = DEFAULT, + span_not: Union["SpanNotQuery", Dict[str, Any], DefaultType] = DEFAULT, + span_or: Union["SpanOrQuery", Dict[str, Any], DefaultType] = DEFAULT, + span_term: Union[ + Mapping[Union[str, InstrumentedField], "SpanTermQuery"], + Dict[str, Any], + DefaultType, + ] = DEFAULT, + span_within: Union["SpanWithinQuery", Dict[str, Any], DefaultType] = DEFAULT, + **kwargs: Any, + ): + if span_containing is not DEFAULT: + kwargs["span_containing"] = span_containing + if span_field_masking is not DEFAULT: + kwargs["span_field_masking"] = span_field_masking + if span_first is not DEFAULT: + kwargs["span_first"] = span_first + if span_gap is not DEFAULT: + kwargs["span_gap"] = str(span_gap) + if span_multi is not DEFAULT: + kwargs["span_multi"] = span_multi + if span_near is not DEFAULT: + kwargs["span_near"] = span_near + if span_not is not DEFAULT: + kwargs["span_not"] = span_not + if span_or is not DEFAULT: + kwargs["span_or"] = span_or + if span_term is not DEFAULT: + kwargs["span_term"] = str(span_term) + if span_within is not DEFAULT: + kwargs["span_within"] = span_within + super().__init__(kwargs) + + +class SpanTermQuery(AttrDict[Any]): + """ + :arg value: (required) + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + value: Union[str, DefaultType] + boost: Union[float, DefaultType] + _name: Union[str, DefaultType] + + def __init__( + self, + *, + value: Union[str, DefaultType] = DEFAULT, + boost: Union[float, DefaultType] = DEFAULT, + _name: Union[str, DefaultType] = DEFAULT, + **kwargs: Any, + ): + if value is not DEFAULT: + kwargs["value"] = value + if boost is not DEFAULT: + kwargs["boost"] = boost + if _name is not DEFAULT: + kwargs["_name"] = _name + super().__init__(kwargs) + + +class SpanWithinQuery(AttrDict[Any]): + """ + :arg big: (required) Can be any span query. Matching spans from + `little` that are enclosed within `big` are returned. + :arg little: (required) Can be any span query. Matching spans from + `little` that are enclosed within `big` are returned. + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + big: Union["SpanQuery", Dict[str, Any], DefaultType] + little: Union["SpanQuery", Dict[str, Any], DefaultType] + boost: Union[float, DefaultType] + _name: Union[str, DefaultType] + + def __init__( + self, + *, + big: Union["SpanQuery", Dict[str, Any], DefaultType] = DEFAULT, + little: Union["SpanQuery", Dict[str, Any], DefaultType] = DEFAULT, + boost: Union[float, DefaultType] = DEFAULT, + _name: Union[str, DefaultType] = DEFAULT, + **kwargs: Any, + ): + if big is not DEFAULT: + kwargs["big"] = big + if little is not DEFAULT: + kwargs["little"] = little + if boost is not DEFAULT: + kwargs["boost"] = boost + if _name is not DEFAULT: + kwargs["_name"] = _name + super().__init__(kwargs) + + +class TDigest(AttrDict[Any]): + """ + :arg compression: Limits the maximum number of nodes used by the + underlying TDigest algorithm to `20 * compression`, enabling + control of memory usage and approximation error. + """ + + compression: Union[int, DefaultType] + + def __init__( + self, *, compression: Union[int, DefaultType] = DEFAULT, **kwargs: Any + ): + if compression is not DEFAULT: + kwargs["compression"] = compression + super().__init__(kwargs) + + +class TermQuery(AttrDict[Any]): + """ + :arg value: (required) Term you wish to find in the provided field. + :arg case_insensitive: Allows ASCII case insensitive matching of the + value with the indexed field values when set to `true`. When + `false`, the case sensitivity of matching depends on the + underlying field’s mapping. + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + value: Union[int, float, str, bool, None, Any, DefaultType] + case_insensitive: Union[bool, DefaultType] + boost: Union[float, DefaultType] + _name: Union[str, DefaultType] + + def __init__( + self, + *, + value: Union[int, float, str, bool, None, Any, DefaultType] = DEFAULT, + case_insensitive: Union[bool, DefaultType] = DEFAULT, + boost: Union[float, DefaultType] = DEFAULT, + _name: Union[str, DefaultType] = DEFAULT, + **kwargs: Any, + ): + if value is not DEFAULT: + kwargs["value"] = value + if case_insensitive is not DEFAULT: + kwargs["case_insensitive"] = case_insensitive + if boost is not DEFAULT: + kwargs["boost"] = boost + if _name is not DEFAULT: + kwargs["_name"] = _name + super().__init__(kwargs) + + +class TermsLookup(AttrDict[Any]): + """ + :arg index: (required) + :arg id: (required) + :arg path: (required) + :arg routing: + """ + + index: Union[str, DefaultType] + id: Union[str, DefaultType] + path: Union[str, InstrumentedField, DefaultType] + routing: Union[str, DefaultType] + + def __init__( + self, + *, + index: Union[str, DefaultType] = DEFAULT, + id: Union[str, DefaultType] = DEFAULT, + path: Union[str, InstrumentedField, DefaultType] = DEFAULT, + routing: Union[str, DefaultType] = DEFAULT, + **kwargs: Any, + ): + if index is not DEFAULT: + kwargs["index"] = index + if id is not DEFAULT: + kwargs["id"] = id + if path is not DEFAULT: + kwargs["path"] = str(path) + if routing is not DEFAULT: + kwargs["routing"] = routing + super().__init__(kwargs) + + +class TermsPartition(AttrDict[Any]): + """ + :arg num_partitions: (required) The number of partitions. + :arg partition: (required) The partition number for this request. + """ + + num_partitions: Union[int, DefaultType] + partition: Union[int, DefaultType] + + def __init__( + self, + *, + num_partitions: Union[int, DefaultType] = DEFAULT, + partition: Union[int, DefaultType] = DEFAULT, + **kwargs: Any, + ): + if num_partitions is not DEFAULT: + kwargs["num_partitions"] = num_partitions + if partition is not DEFAULT: + kwargs["partition"] = partition + super().__init__(kwargs) + + +class TermsSetQuery(AttrDict[Any]): + """ + :arg terms: (required) Array of terms you wish to find in the provided + field. + :arg minimum_should_match: Specification describing number of matching + terms required to return a document. + :arg minimum_should_match_field: Numeric field containing the number + of matching terms required to return a document. + :arg minimum_should_match_script: Custom script containing the number + of matching terms required to return a document. + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + terms: Union[Sequence[str], DefaultType] + minimum_should_match: Union[int, str, DefaultType] + minimum_should_match_field: Union[str, InstrumentedField, DefaultType] + minimum_should_match_script: Union["Script", Dict[str, Any], DefaultType] + boost: Union[float, DefaultType] + _name: Union[str, DefaultType] + + def __init__( + self, + *, + terms: Union[Sequence[str], DefaultType] = DEFAULT, + minimum_should_match: Union[int, str, DefaultType] = DEFAULT, + minimum_should_match_field: Union[ + str, InstrumentedField, DefaultType + ] = DEFAULT, + minimum_should_match_script: Union[ + "Script", Dict[str, Any], DefaultType + ] = DEFAULT, + boost: Union[float, DefaultType] = DEFAULT, + _name: Union[str, DefaultType] = DEFAULT, + **kwargs: Any, + ): + if terms is not DEFAULT: + kwargs["terms"] = terms + if minimum_should_match is not DEFAULT: + kwargs["minimum_should_match"] = minimum_should_match + if minimum_should_match_field is not DEFAULT: + kwargs["minimum_should_match_field"] = str(minimum_should_match_field) + if minimum_should_match_script is not DEFAULT: + kwargs["minimum_should_match_script"] = minimum_should_match_script + if boost is not DEFAULT: + kwargs["boost"] = boost + if _name is not DEFAULT: + kwargs["_name"] = _name + super().__init__(kwargs) + + +class TestPopulation(AttrDict[Any]): + """ + :arg field: (required) The field to aggregate. + :arg script: + :arg filter: A filter used to define a set of records to run unpaired + t-test on. + """ + + field: Union[str, InstrumentedField, DefaultType] + script: Union["Script", Dict[str, Any], DefaultType] + filter: Union[Query, DefaultType] + + def __init__( + self, + *, + field: Union[str, InstrumentedField, DefaultType] = DEFAULT, + script: Union["Script", Dict[str, Any], DefaultType] = DEFAULT, + filter: Union[Query, DefaultType] = DEFAULT, + **kwargs: Any, + ): + if field is not DEFAULT: + kwargs["field"] = str(field) + if script is not DEFAULT: + kwargs["script"] = script + if filter is not DEFAULT: + kwargs["filter"] = filter + super().__init__(kwargs) + + +class TextEmbedding(AttrDict[Any]): + """ + :arg model_id: (required) + :arg model_text: (required) + """ + + model_id: Union[str, DefaultType] + model_text: Union[str, DefaultType] + + def __init__( + self, + *, + model_id: Union[str, DefaultType] = DEFAULT, + model_text: Union[str, DefaultType] = DEFAULT, + **kwargs: Any, + ): + if model_id is not DEFAULT: + kwargs["model_id"] = model_id + if model_text is not DEFAULT: + kwargs["model_text"] = model_text + super().__init__(kwargs) + + +class TextExpansionQuery(AttrDict[Any]): + """ + :arg model_id: (required) The text expansion NLP model to use + :arg model_text: (required) The query text + :arg pruning_config: Token pruning configurations + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + model_id: Union[str, DefaultType] + model_text: Union[str, DefaultType] + pruning_config: Union["TokenPruningConfig", Dict[str, Any], DefaultType] + boost: Union[float, DefaultType] + _name: Union[str, DefaultType] + + def __init__( + self, + *, + model_id: Union[str, DefaultType] = DEFAULT, + model_text: Union[str, DefaultType] = DEFAULT, + pruning_config: Union[ + "TokenPruningConfig", Dict[str, Any], DefaultType + ] = DEFAULT, + boost: Union[float, DefaultType] = DEFAULT, + _name: Union[str, DefaultType] = DEFAULT, + **kwargs: Any, + ): + if model_id is not DEFAULT: + kwargs["model_id"] = model_id + if model_text is not DEFAULT: + kwargs["model_text"] = model_text + if pruning_config is not DEFAULT: + kwargs["pruning_config"] = pruning_config + if boost is not DEFAULT: + kwargs["boost"] = boost + if _name is not DEFAULT: + kwargs["_name"] = _name + super().__init__(kwargs) + + +class TokenPruningConfig(AttrDict[Any]): + """ + :arg tokens_freq_ratio_threshold: Tokens whose frequency is more than + this threshold times the average frequency of all tokens in the + specified field are considered outliers and pruned. Defaults to + `5` if omitted. + :arg tokens_weight_threshold: Tokens whose weight is less than this + threshold are considered nonsignificant and pruned. Defaults to + `0.4` if omitted. + :arg only_score_pruned_tokens: Whether to only score pruned tokens, vs + only scoring kept tokens. + """ + + tokens_freq_ratio_threshold: Union[int, DefaultType] + tokens_weight_threshold: Union[float, DefaultType] + only_score_pruned_tokens: Union[bool, DefaultType] + + def __init__( + self, + *, + tokens_freq_ratio_threshold: Union[int, DefaultType] = DEFAULT, + tokens_weight_threshold: Union[float, DefaultType] = DEFAULT, + only_score_pruned_tokens: Union[bool, DefaultType] = DEFAULT, + **kwargs: Any, + ): + if tokens_freq_ratio_threshold is not DEFAULT: + kwargs["tokens_freq_ratio_threshold"] = tokens_freq_ratio_threshold + if tokens_weight_threshold is not DEFAULT: + kwargs["tokens_weight_threshold"] = tokens_weight_threshold + if only_score_pruned_tokens is not DEFAULT: + kwargs["only_score_pruned_tokens"] = only_score_pruned_tokens + super().__init__(kwargs) + + +class TopLeftBottomRightGeoBounds(AttrDict[Any]): + """ + :arg top_left: (required) + :arg bottom_right: (required) + """ + + top_left: Union[ + "LatLonGeoLocation", + "GeoHashLocation", + Sequence[float], + str, + Dict[str, Any], + DefaultType, + ] + bottom_right: Union[ + "LatLonGeoLocation", + "GeoHashLocation", + Sequence[float], + str, + Dict[str, Any], + DefaultType, + ] + + def __init__( + self, + *, + top_left: Union[ + "LatLonGeoLocation", + "GeoHashLocation", + Sequence[float], + str, + Dict[str, Any], + DefaultType, + ] = DEFAULT, + bottom_right: Union[ + "LatLonGeoLocation", + "GeoHashLocation", + Sequence[float], + str, + Dict[str, Any], + DefaultType, + ] = DEFAULT, + **kwargs: Any, + ): + if top_left is not DEFAULT: + kwargs["top_left"] = top_left + if bottom_right is not DEFAULT: + kwargs["bottom_right"] = bottom_right + super().__init__(kwargs) + + +class TopMetricsValue(AttrDict[Any]): + """ + :arg field: (required) A field to return as a metric. + """ + + field: Union[str, InstrumentedField, DefaultType] + + def __init__( + self, + *, + field: Union[str, InstrumentedField, DefaultType] = DEFAULT, + **kwargs: Any, + ): + if field is not DEFAULT: + kwargs["field"] = str(field) + super().__init__(kwargs) + + +class TopRightBottomLeftGeoBounds(AttrDict[Any]): + """ + :arg top_right: (required) + :arg bottom_left: (required) + """ + + top_right: Union[ + "LatLonGeoLocation", + "GeoHashLocation", + Sequence[float], + str, + Dict[str, Any], + DefaultType, + ] + bottom_left: Union[ + "LatLonGeoLocation", + "GeoHashLocation", + Sequence[float], + str, + Dict[str, Any], + DefaultType, + ] + + def __init__( + self, + *, + top_right: Union[ + "LatLonGeoLocation", + "GeoHashLocation", + Sequence[float], + str, + Dict[str, Any], + DefaultType, + ] = DEFAULT, + bottom_left: Union[ + "LatLonGeoLocation", + "GeoHashLocation", + Sequence[float], + str, + Dict[str, Any], + DefaultType, + ] = DEFAULT, + **kwargs: Any, + ): + if top_right is not DEFAULT: + kwargs["top_right"] = top_right + if bottom_left is not DEFAULT: + kwargs["bottom_left"] = bottom_left + super().__init__(kwargs) + + +class WeightedAverageValue(AttrDict[Any]): + """ + :arg field: The field from which to extract the values or weights. + :arg missing: A value or weight to use if the field is missing. + :arg script: + """ + + field: Union[str, InstrumentedField, DefaultType] + missing: Union[float, DefaultType] + script: Union["Script", Dict[str, Any], DefaultType] + + def __init__( + self, + *, + field: Union[str, InstrumentedField, DefaultType] = DEFAULT, + missing: Union[float, DefaultType] = DEFAULT, + script: Union["Script", Dict[str, Any], DefaultType] = DEFAULT, + **kwargs: Any, + ): + if field is not DEFAULT: + kwargs["field"] = str(field) + if missing is not DEFAULT: + kwargs["missing"] = missing + if script is not DEFAULT: + kwargs["script"] = script + super().__init__(kwargs) + + +class WeightedTokensQuery(AttrDict[Any]): + """ + :arg tokens: (required) The tokens representing this query + :arg pruning_config: Token pruning configurations + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + tokens: Union[Mapping[str, float], DefaultType] + pruning_config: Union["TokenPruningConfig", Dict[str, Any], DefaultType] + boost: Union[float, DefaultType] + _name: Union[str, DefaultType] + + def __init__( + self, + *, + tokens: Union[Mapping[str, float], DefaultType] = DEFAULT, + pruning_config: Union[ + "TokenPruningConfig", Dict[str, Any], DefaultType + ] = DEFAULT, + boost: Union[float, DefaultType] = DEFAULT, + _name: Union[str, DefaultType] = DEFAULT, + **kwargs: Any, + ): + if tokens is not DEFAULT: + kwargs["tokens"] = tokens + if pruning_config is not DEFAULT: + kwargs["pruning_config"] = pruning_config + if boost is not DEFAULT: + kwargs["boost"] = boost + if _name is not DEFAULT: + kwargs["_name"] = _name + super().__init__(kwargs) + + +class WildcardQuery(AttrDict[Any]): + """ + :arg case_insensitive: Allows case insensitive matching of the pattern + with the indexed field values when set to true. Default is false + which means the case sensitivity of matching depends on the + underlying field’s mapping. + :arg rewrite: Method used to rewrite the query. + :arg value: Wildcard pattern for terms you wish to find in the + provided field. Required, when wildcard is not set. + :arg wildcard: Wildcard pattern for terms you wish to find in the + provided field. Required, when value is not set. + :arg boost: Floating point number used to decrease or increase the + relevance scores of the query. Boost values are relative to the + default value of 1.0. A boost value between 0 and 1.0 decreases + the relevance score. A value greater than 1.0 increases the + relevance score. Defaults to `1` if omitted. + :arg _name: + """ + + case_insensitive: Union[bool, DefaultType] + rewrite: Union[str, DefaultType] + value: Union[str, DefaultType] + wildcard: Union[str, DefaultType] + boost: Union[float, DefaultType] + _name: Union[str, DefaultType] + + def __init__( + self, + *, + case_insensitive: Union[bool, DefaultType] = DEFAULT, + rewrite: Union[str, DefaultType] = DEFAULT, + value: Union[str, DefaultType] = DEFAULT, + wildcard: Union[str, DefaultType] = DEFAULT, + boost: Union[float, DefaultType] = DEFAULT, + _name: Union[str, DefaultType] = DEFAULT, + **kwargs: Any, + ): + if case_insensitive is not DEFAULT: + kwargs["case_insensitive"] = case_insensitive + if rewrite is not DEFAULT: + kwargs["rewrite"] = rewrite + if value is not DEFAULT: + kwargs["value"] = value + if wildcard is not DEFAULT: + kwargs["wildcard"] = wildcard + if boost is not DEFAULT: + kwargs["boost"] = boost + if _name is not DEFAULT: + kwargs["_name"] = _name + super().__init__(kwargs) + + +class WktGeoBounds(AttrDict[Any]): + """ + :arg wkt: (required) + """ + + wkt: Union[str, DefaultType] + + def __init__(self, *, wkt: Union[str, DefaultType] = DEFAULT, **kwargs: Any): + if wkt is not DEFAULT: + kwargs["wkt"] = wkt + super().__init__(kwargs) + + +class AdjacencyMatrixAggregate(AttrDict[Any]): + """ + :arg buckets: (required) the aggregation buckets as a list + :arg meta: + """ + + buckets: Sequence["AdjacencyMatrixBucket"] + meta: Mapping[str, Any] + + @property + def buckets_as_dict(self) -> Mapping[str, "AdjacencyMatrixBucket"]: + return self.buckets # type: ignore + + +class AdjacencyMatrixBucket(AttrDict[Any]): + """ + :arg key: (required) + :arg doc_count: (required) + """ + + key: str + doc_count: int + + +class AggregationBreakdown(AttrDict[Any]): + """ + :arg build_aggregation: (required) + :arg build_aggregation_count: (required) + :arg build_leaf_collector: (required) + :arg build_leaf_collector_count: (required) + :arg collect: (required) + :arg collect_count: (required) + :arg initialize: (required) + :arg initialize_count: (required) + :arg reduce: (required) + :arg reduce_count: (required) + :arg post_collection: + :arg post_collection_count: + """ + + build_aggregation: int + build_aggregation_count: int + build_leaf_collector: int + build_leaf_collector_count: int + collect: int + collect_count: int + initialize: int + initialize_count: int + reduce: int + reduce_count: int + post_collection: int + post_collection_count: int + + +class AggregationProfile(AttrDict[Any]): + """ + :arg breakdown: (required) + :arg description: (required) + :arg time_in_nanos: (required) + :arg type: (required) + :arg debug: + :arg children: + """ + + breakdown: "AggregationBreakdown" + description: str + time_in_nanos: Any + type: str + debug: "AggregationProfileDebug" + children: Sequence["AggregationProfile"] + + +class AggregationProfileDebug(AttrDict[Any]): + """ + :arg segments_with_multi_valued_ords: + :arg collection_strategy: + :arg segments_with_single_valued_ords: + :arg total_buckets: + :arg built_buckets: + :arg result_strategy: + :arg has_filter: + :arg delegate: + :arg delegate_debug: + :arg chars_fetched: + :arg extract_count: + :arg extract_ns: + :arg values_fetched: + :arg collect_analyzed_ns: + :arg collect_analyzed_count: + :arg surviving_buckets: + :arg ordinals_collectors_used: + :arg ordinals_collectors_overhead_too_high: + :arg string_hashing_collectors_used: + :arg numeric_collectors_used: + :arg empty_collectors_used: + :arg deferred_aggregators: + :arg segments_with_doc_count_field: + :arg segments_with_deleted_docs: + :arg filters: + :arg segments_counted: + :arg segments_collected: + :arg map_reducer: + :arg brute_force_used: + :arg dynamic_pruning_attempted: + :arg dynamic_pruning_used: + :arg skipped_due_to_no_data: + """ + + segments_with_multi_valued_ords: int + collection_strategy: str + segments_with_single_valued_ords: int + total_buckets: int + built_buckets: int + result_strategy: str + has_filter: bool + delegate: str + delegate_debug: "AggregationProfileDebug" + chars_fetched: int + extract_count: int + extract_ns: int + values_fetched: int + collect_analyzed_ns: int + collect_analyzed_count: int + surviving_buckets: int + ordinals_collectors_used: int + ordinals_collectors_overhead_too_high: int + string_hashing_collectors_used: int + numeric_collectors_used: int + empty_collectors_used: int + deferred_aggregators: Sequence[str] + segments_with_doc_count_field: int + segments_with_deleted_docs: int + filters: Sequence["AggregationProfileDelegateDebugFilter"] + segments_counted: int + segments_collected: int + map_reducer: str + brute_force_used: int + dynamic_pruning_attempted: int + dynamic_pruning_used: int + skipped_due_to_no_data: int + + +class AggregationProfileDelegateDebugFilter(AttrDict[Any]): + """ + :arg results_from_metadata: + :arg query: + :arg specialized_for: + :arg segments_counted_in_constant_time: + """ + + results_from_metadata: int + query: str + specialized_for: str + segments_counted_in_constant_time: int + + +class ArrayPercentilesItem(AttrDict[Any]): + """ + :arg key: (required) + :arg value: (required) + :arg value_as_string: + """ + + key: str + value: Union[float, None] + value_as_string: str + + +class AutoDateHistogramAggregate(AttrDict[Any]): + """ + :arg interval: (required) + :arg buckets: (required) the aggregation buckets as a list + :arg meta: + """ + + interval: str + buckets: Sequence["DateHistogramBucket"] + meta: Mapping[str, Any] + + @property + def buckets_as_dict(self) -> Mapping[str, "DateHistogramBucket"]: + return self.buckets # type: ignore + + +class AvgAggregate(AttrDict[Any]): + """ + :arg value: (required) The metric value. A missing value generally + means that there was no data to aggregate, unless specified + otherwise. + :arg value_as_string: + :arg meta: + """ + + value: Union[float, None] + value_as_string: str + meta: Mapping[str, Any] + + +class BoxPlotAggregate(AttrDict[Any]): + """ + :arg min: (required) + :arg max: (required) + :arg q1: (required) + :arg q2: (required) + :arg q3: (required) + :arg lower: (required) + :arg upper: (required) + :arg min_as_string: + :arg max_as_string: + :arg q1_as_string: + :arg q2_as_string: + :arg q3_as_string: + :arg lower_as_string: + :arg upper_as_string: + :arg meta: + """ + + min: float + max: float + q1: float + q2: float + q3: float + lower: float + upper: float + min_as_string: str + max_as_string: str + q1_as_string: str + q2_as_string: str + q3_as_string: str + lower_as_string: str + upper_as_string: str + meta: Mapping[str, Any] + + +class BucketMetricValueAggregate(AttrDict[Any]): + """ + :arg keys: (required) + :arg value: (required) The metric value. A missing value generally + means that there was no data to aggregate, unless specified + otherwise. + :arg value_as_string: + :arg meta: + """ + + keys: Sequence[str] # type: ignore[assignment] + value: Union[float, None] + value_as_string: str + meta: Mapping[str, Any] + + +class BulkIndexByScrollFailure(AttrDict[Any]): + """ + :arg cause: (required) + :arg id: (required) + :arg index: (required) + :arg status: (required) + :arg type: (required) + """ + + cause: "ErrorCause" + id: str + index: str + status: int + type: str + + +class CardinalityAggregate(AttrDict[Any]): + """ + :arg value: (required) + :arg meta: + """ + + value: int + meta: Mapping[str, Any] + + +class ChildrenAggregate(AttrDict[Any]): + """ + :arg doc_count: (required) + :arg meta: + """ + + doc_count: int + meta: Mapping[str, Any] + + +class ClusterDetails(AttrDict[Any]): + """ + :arg status: (required) + :arg indices: (required) + :arg timed_out: (required) + :arg took: + :arg _shards: + :arg failures: + """ + + status: Literal["running", "successful", "partial", "skipped", "failed"] + indices: str + timed_out: bool + took: Any + _shards: "ShardStatistics" + failures: Sequence["ShardFailure"] + + +class ClusterStatistics(AttrDict[Any]): + """ + :arg skipped: (required) + :arg successful: (required) + :arg total: (required) + :arg running: (required) + :arg partial: (required) + :arg failed: (required) + :arg details: + """ + + skipped: int + successful: int + total: int + running: int + partial: int + failed: int + details: Mapping[str, "ClusterDetails"] + + +class Collector(AttrDict[Any]): + """ + :arg name: (required) + :arg reason: (required) + :arg time_in_nanos: (required) + :arg children: + """ + + name: str + reason: str + time_in_nanos: Any + children: Sequence["Collector"] + + +class CompletionSuggest(AttrDict[Any]): + """ + :arg options: (required) + :arg length: (required) + :arg offset: (required) + :arg text: (required) + """ + + options: Sequence["CompletionSuggestOption"] + length: int + offset: int + text: str + + +class CompletionSuggestOption(AttrDict[Any]): + """ + :arg text: (required) + :arg collate_match: + :arg contexts: + :arg fields: + :arg _id: + :arg _index: + :arg _routing: + :arg _score: + :arg _source: + :arg score: + """ + + text: str + collate_match: bool + contexts: Mapping[ + str, + Sequence[ + Union[ + str, Union["LatLonGeoLocation", "GeoHashLocation", Sequence[float], str] + ] + ], + ] + fields: Mapping[str, Any] + _id: str + _index: str + _routing: str + _score: float + _source: Any + score: float + + +class CompositeAggregate(AttrDict[Any]): + """ + :arg after_key: + :arg buckets: (required) the aggregation buckets as a list + :arg meta: + """ + + after_key: Mapping[str, Union[int, float, str, bool, None, Any]] + buckets: Sequence["CompositeBucket"] + meta: Mapping[str, Any] + + @property + def buckets_as_dict(self) -> Mapping[str, "CompositeBucket"]: + return self.buckets # type: ignore + + +class CompositeBucket(AttrDict[Any]): + """ + :arg key: (required) + :arg doc_count: (required) + """ + + key: Mapping[str, Union[int, float, str, bool, None, Any]] + doc_count: int + + +class CumulativeCardinalityAggregate(AttrDict[Any]): + """ + Result of the `cumulative_cardinality` aggregation + + :arg value: (required) + :arg value_as_string: + :arg meta: + """ + + value: int + value_as_string: str + meta: Mapping[str, Any] + + +class DateHistogramAggregate(AttrDict[Any]): + """ + :arg buckets: (required) the aggregation buckets as a list + :arg meta: + """ + + buckets: Sequence["DateHistogramBucket"] + meta: Mapping[str, Any] + + @property + def buckets_as_dict(self) -> Mapping[str, "DateHistogramBucket"]: + return self.buckets # type: ignore + + +class DateHistogramBucket(AttrDict[Any]): + """ + :arg key: (required) + :arg doc_count: (required) + :arg key_as_string: + """ + + key: Any + doc_count: int + key_as_string: str + + +class DateRangeAggregate(AttrDict[Any]): + """ + Result of a `date_range` aggregation. Same format as a for a `range` + aggregation: `from` and `to` in `buckets` are milliseconds since the + Epoch, represented as a floating point number. + + :arg buckets: (required) the aggregation buckets as a list + :arg meta: + """ + + buckets: Sequence["RangeBucket"] + meta: Mapping[str, Any] + + @property + def buckets_as_dict(self) -> Mapping[str, "RangeBucket"]: + return self.buckets # type: ignore + + +class DerivativeAggregate(AttrDict[Any]): + """ + :arg value: (required) The metric value. A missing value generally + means that there was no data to aggregate, unless specified + otherwise. + :arg normalized_value: + :arg normalized_value_as_string: + :arg value_as_string: + :arg meta: + """ + + value: Union[float, None] + normalized_value: float + normalized_value_as_string: str + value_as_string: str + meta: Mapping[str, Any] + + +class DfsKnnProfile(AttrDict[Any]): + """ + :arg query: (required) + :arg rewrite_time: (required) + :arg collector: (required) + :arg vector_operations_count: + """ + + query: Sequence["KnnQueryProfileResult"] + rewrite_time: int + collector: Sequence["KnnCollectorResult"] + vector_operations_count: int + + +class DfsProfile(AttrDict[Any]): + """ + :arg statistics: + :arg knn: + """ + + statistics: "DfsStatisticsProfile" + knn: Sequence["DfsKnnProfile"] + + +class DfsStatisticsBreakdown(AttrDict[Any]): + """ + :arg collection_statistics: (required) + :arg collection_statistics_count: (required) + :arg create_weight: (required) + :arg create_weight_count: (required) + :arg rewrite: (required) + :arg rewrite_count: (required) + :arg term_statistics: (required) + :arg term_statistics_count: (required) + """ + + collection_statistics: int + collection_statistics_count: int + create_weight: int + create_weight_count: int + rewrite: int + rewrite_count: int + term_statistics: int + term_statistics_count: int + + +class DfsStatisticsProfile(AttrDict[Any]): + """ + :arg type: (required) + :arg description: (required) + :arg time_in_nanos: (required) + :arg breakdown: (required) + :arg time: + :arg debug: + :arg children: + """ + + type: str + description: str + time_in_nanos: Any + breakdown: "DfsStatisticsBreakdown" + time: Any + debug: Mapping[str, Any] + children: Sequence["DfsStatisticsProfile"] + + +class DoubleTermsAggregate(AttrDict[Any]): + """ + Result of a `terms` aggregation when the field is some kind of decimal + number like a float, double, or distance. + + :arg doc_count_error_upper_bound: + :arg sum_other_doc_count: + :arg buckets: (required) the aggregation buckets as a list + :arg meta: + """ + + doc_count_error_upper_bound: int + sum_other_doc_count: int + buckets: Sequence["DoubleTermsBucket"] + meta: Mapping[str, Any] + + @property + def buckets_as_dict(self) -> Mapping[str, "DoubleTermsBucket"]: + return self.buckets # type: ignore + + +class DoubleTermsBucket(AttrDict[Any]): + """ + :arg key: (required) + :arg doc_count: (required) + :arg key_as_string: + :arg doc_count_error_upper_bound: + """ + + key: float + doc_count: int + key_as_string: str + doc_count_error_upper_bound: int + + +class ErrorCause(AttrDict[Any]): + """ + Cause and details about a request failure. This class defines the + properties common to all error types. Additional details are also + provided, that depend on the error type. + + :arg type: (required) The type of error + :arg reason: A human-readable explanation of the error, in english + :arg stack_trace: The server stack trace. Present only if the + `error_trace=true` parameter was sent with the request. + :arg caused_by: + :arg root_cause: + :arg suppressed: + """ + + type: str + reason: str + stack_trace: str + caused_by: "ErrorCause" + root_cause: Sequence["ErrorCause"] + suppressed: Sequence["ErrorCause"] + + +class Explanation(AttrDict[Any]): + """ + :arg description: (required) + :arg details: (required) + :arg value: (required) + """ + + description: str + details: Sequence["ExplanationDetail"] + value: float + + +class ExplanationDetail(AttrDict[Any]): + """ + :arg description: (required) + :arg value: (required) + :arg details: + """ + + description: str + value: float + details: Sequence["ExplanationDetail"] + + +class ExtendedStatsAggregate(AttrDict[Any]): + """ + :arg sum_of_squares: (required) + :arg variance: (required) + :arg variance_population: (required) + :arg variance_sampling: (required) + :arg std_deviation: (required) + :arg std_deviation_population: (required) + :arg std_deviation_sampling: (required) + :arg count: (required) + :arg min: (required) + :arg max: (required) + :arg avg: (required) + :arg sum: (required) + :arg std_deviation_bounds: + :arg sum_of_squares_as_string: + :arg variance_as_string: + :arg variance_population_as_string: + :arg variance_sampling_as_string: + :arg std_deviation_as_string: + :arg std_deviation_bounds_as_string: + :arg min_as_string: + :arg max_as_string: + :arg avg_as_string: + :arg sum_as_string: + :arg meta: + """ + + sum_of_squares: Union[float, None] + variance: Union[float, None] + variance_population: Union[float, None] + variance_sampling: Union[float, None] + std_deviation: Union[float, None] + std_deviation_population: Union[float, None] + std_deviation_sampling: Union[float, None] + count: int + min: Union[float, None] + max: Union[float, None] + avg: Union[float, None] + sum: float + std_deviation_bounds: "StandardDeviationBounds" + sum_of_squares_as_string: str + variance_as_string: str + variance_population_as_string: str + variance_sampling_as_string: str + std_deviation_as_string: str + std_deviation_bounds_as_string: "StandardDeviationBoundsAsString" + min_as_string: str + max_as_string: str + avg_as_string: str + sum_as_string: str + meta: Mapping[str, Any] + + +class ExtendedStatsBucketAggregate(AttrDict[Any]): + """ + :arg sum_of_squares: (required) + :arg variance: (required) + :arg variance_population: (required) + :arg variance_sampling: (required) + :arg std_deviation: (required) + :arg std_deviation_population: (required) + :arg std_deviation_sampling: (required) + :arg count: (required) + :arg min: (required) + :arg max: (required) + :arg avg: (required) + :arg sum: (required) + :arg std_deviation_bounds: + :arg sum_of_squares_as_string: + :arg variance_as_string: + :arg variance_population_as_string: + :arg variance_sampling_as_string: + :arg std_deviation_as_string: + :arg std_deviation_bounds_as_string: + :arg min_as_string: + :arg max_as_string: + :arg avg_as_string: + :arg sum_as_string: + :arg meta: + """ + + sum_of_squares: Union[float, None] + variance: Union[float, None] + variance_population: Union[float, None] + variance_sampling: Union[float, None] + std_deviation: Union[float, None] + std_deviation_population: Union[float, None] + std_deviation_sampling: Union[float, None] + count: int + min: Union[float, None] + max: Union[float, None] + avg: Union[float, None] + sum: float + std_deviation_bounds: "StandardDeviationBounds" + sum_of_squares_as_string: str + variance_as_string: str + variance_population_as_string: str + variance_sampling_as_string: str + std_deviation_as_string: str + std_deviation_bounds_as_string: "StandardDeviationBoundsAsString" + min_as_string: str + max_as_string: str + avg_as_string: str + sum_as_string: str + meta: Mapping[str, Any] + + +class FetchProfile(AttrDict[Any]): + """ + :arg type: (required) + :arg description: (required) + :arg time_in_nanos: (required) + :arg breakdown: (required) + :arg debug: + :arg children: + """ + + type: str + description: str + time_in_nanos: Any + breakdown: "FetchProfileBreakdown" + debug: "FetchProfileDebug" + children: Sequence["FetchProfile"] + + +class FetchProfileBreakdown(AttrDict[Any]): + """ + :arg load_source: + :arg load_source_count: + :arg load_stored_fields: + :arg load_stored_fields_count: + :arg next_reader: + :arg next_reader_count: + :arg process_count: + :arg process: + """ + + load_source: int + load_source_count: int + load_stored_fields: int + load_stored_fields_count: int + next_reader: int + next_reader_count: int + process_count: int + process: int + + +class FetchProfileDebug(AttrDict[Any]): + """ + :arg stored_fields: + :arg fast_path: + """ + + stored_fields: Sequence[str] + fast_path: int + + +class FilterAggregate(AttrDict[Any]): + """ + :arg doc_count: (required) + :arg meta: + """ + + doc_count: int + meta: Mapping[str, Any] + + +class FiltersAggregate(AttrDict[Any]): + """ + :arg buckets: (required) the aggregation buckets as a list + :arg meta: + """ + + buckets: Sequence["FiltersBucket"] + meta: Mapping[str, Any] + + @property + def buckets_as_dict(self) -> Mapping[str, "FiltersBucket"]: + return self.buckets # type: ignore + + +class FiltersBucket(AttrDict[Any]): + """ + :arg doc_count: (required) + """ + + doc_count: int + + +class FrequentItemSetsAggregate(AttrDict[Any]): + """ + :arg buckets: (required) the aggregation buckets as a list + :arg meta: + """ + + buckets: Sequence["FrequentItemSetsBucket"] + meta: Mapping[str, Any] + + @property + def buckets_as_dict(self) -> Mapping[str, "FrequentItemSetsBucket"]: + return self.buckets # type: ignore + + +class FrequentItemSetsBucket(AttrDict[Any]): + """ + :arg key: (required) + :arg support: (required) + :arg doc_count: (required) + """ + + key: Mapping[str, Sequence[str]] + support: float + doc_count: int + + +class GeoBoundsAggregate(AttrDict[Any]): + """ + :arg bounds: + :arg meta: + """ + + bounds: Union[ + "CoordsGeoBounds", + "TopLeftBottomRightGeoBounds", + "TopRightBottomLeftGeoBounds", + "WktGeoBounds", + ] + meta: Mapping[str, Any] + + +class GeoCentroidAggregate(AttrDict[Any]): + """ + :arg count: (required) + :arg location: + :arg meta: + """ + + count: int + location: Union["LatLonGeoLocation", "GeoHashLocation", Sequence[float], str] + meta: Mapping[str, Any] + + +class GeoDistanceAggregate(AttrDict[Any]): + """ + Result of a `geo_distance` aggregation. The unit for `from` and `to` + is meters by default. + + :arg buckets: (required) the aggregation buckets as a list + :arg meta: + """ + + buckets: Sequence["RangeBucket"] + meta: Mapping[str, Any] + + @property + def buckets_as_dict(self) -> Mapping[str, "RangeBucket"]: + return self.buckets # type: ignore + + +class GeoHashGridAggregate(AttrDict[Any]): + """ + :arg buckets: (required) the aggregation buckets as a list + :arg meta: + """ + + buckets: Sequence["GeoHashGridBucket"] + meta: Mapping[str, Any] + + @property + def buckets_as_dict(self) -> Mapping[str, "GeoHashGridBucket"]: + return self.buckets # type: ignore + + +class GeoHashGridBucket(AttrDict[Any]): + """ + :arg key: (required) + :arg doc_count: (required) + """ + + key: str + doc_count: int + + +class GeoHexGridAggregate(AttrDict[Any]): + """ + :arg buckets: (required) the aggregation buckets as a list + :arg meta: + """ + + buckets: Sequence["GeoHexGridBucket"] + meta: Mapping[str, Any] + + @property + def buckets_as_dict(self) -> Mapping[str, "GeoHexGridBucket"]: + return self.buckets # type: ignore + + +class GeoHexGridBucket(AttrDict[Any]): + """ + :arg key: (required) + :arg doc_count: (required) + """ + + key: str + doc_count: int + + +class GeoLine(AttrDict[Any]): + """ + A GeoJson GeoLine. + + :arg type: (required) Always `"LineString"` + :arg coordinates: (required) Array of `[lon, lat]` coordinates + """ + + type: str + coordinates: Sequence[Sequence[float]] + + +class GeoLineAggregate(AttrDict[Any]): + """ + :arg type: (required) + :arg geometry: (required) + :arg properties: (required) + :arg meta: + """ + + type: str + geometry: "GeoLine" + properties: Any + meta: Mapping[str, Any] + + +class GeoTileGridAggregate(AttrDict[Any]): + """ + :arg buckets: (required) the aggregation buckets as a list + :arg meta: + """ + + buckets: Sequence["GeoTileGridBucket"] + meta: Mapping[str, Any] + + @property + def buckets_as_dict(self) -> Mapping[str, "GeoTileGridBucket"]: + return self.buckets # type: ignore + + +class GeoTileGridBucket(AttrDict[Any]): + """ + :arg key: (required) + :arg doc_count: (required) + """ + + key: str + doc_count: int + + +class GlobalAggregate(AttrDict[Any]): + """ + :arg doc_count: (required) + :arg meta: + """ + + doc_count: int + meta: Mapping[str, Any] + + +class HdrPercentileRanksAggregate(AttrDict[Any]): + """ + :arg values: (required) + :arg meta: + """ + + values: Union[Mapping[str, Union[str, int, None]], Sequence["ArrayPercentilesItem"]] + meta: Mapping[str, Any] + + +class HdrPercentilesAggregate(AttrDict[Any]): + """ + :arg values: (required) + :arg meta: + """ + + values: Union[Mapping[str, Union[str, int, None]], Sequence["ArrayPercentilesItem"]] + meta: Mapping[str, Any] + + +class HistogramAggregate(AttrDict[Any]): + """ + :arg buckets: (required) the aggregation buckets as a list + :arg meta: + """ + + buckets: Sequence["HistogramBucket"] + meta: Mapping[str, Any] + + @property + def buckets_as_dict(self) -> Mapping[str, "HistogramBucket"]: + return self.buckets # type: ignore + + +class HistogramBucket(AttrDict[Any]): + """ + :arg key: (required) + :arg doc_count: (required) + :arg key_as_string: + """ + + key: float + doc_count: int + key_as_string: str + + +class Hit(AttrDict[Any]): + """ + :arg index: (required) + :arg id: + :arg score: + :arg explanation: + :arg fields: + :arg highlight: + :arg inner_hits: + :arg matched_queries: + :arg nested: + :arg ignored: + :arg ignored_field_values: + :arg shard: + :arg node: + :arg routing: + :arg source: + :arg rank: + :arg seq_no: + :arg primary_term: + :arg version: + :arg sort: + """ + + index: str + id: str + score: Union[float, None] + explanation: "Explanation" + fields: Mapping[str, Any] + highlight: Mapping[str, Sequence[str]] + inner_hits: Mapping[str, "InnerHitsResult"] + matched_queries: Union[Sequence[str], Mapping[str, float]] + nested: "NestedIdentity" + ignored: Sequence[str] + ignored_field_values: Mapping[ + str, Sequence[Union[int, float, str, bool, None, Any]] + ] + shard: str + node: str + routing: str + source: Any + rank: int + seq_no: int + primary_term: int + version: int + sort: Sequence[Union[int, float, str, bool, None, Any]] + + +class HitsMetadata(AttrDict[Any]): + """ + :arg hits: (required) + :arg total: Total hit count information, present only if + `track_total_hits` wasn't `false` in the search request. + :arg max_score: + """ + + hits: Sequence["Hit"] + total: Union["TotalHits", int] + max_score: Union[float, None] + + +class InferenceAggregate(AttrDict[Any]): + """ + :arg value: + :arg feature_importance: + :arg top_classes: + :arg warning: + :arg meta: + """ + + value: Union[int, float, str, bool, None, Any] + feature_importance: Sequence["InferenceFeatureImportance"] + top_classes: Sequence["InferenceTopClassEntry"] + warning: str + meta: Mapping[str, Any] + + +class InferenceClassImportance(AttrDict[Any]): + """ + :arg class_name: (required) + :arg importance: (required) + """ + + class_name: str + importance: float + + +class InferenceFeatureImportance(AttrDict[Any]): + """ + :arg feature_name: (required) + :arg importance: + :arg classes: + """ + + feature_name: str + importance: float + classes: Sequence["InferenceClassImportance"] + + +class InferenceTopClassEntry(AttrDict[Any]): + """ + :arg class_name: (required) + :arg class_probability: (required) + :arg class_score: (required) + """ + + class_name: Union[int, float, str, bool, None, Any] + class_probability: float + class_score: float + + +class InnerHitsResult(AttrDict[Any]): + """ + :arg hits: (required) + """ + + hits: "HitsMetadata" + + +class IpPrefixAggregate(AttrDict[Any]): + """ + :arg buckets: (required) the aggregation buckets as a list + :arg meta: + """ + + buckets: Sequence["IpPrefixBucket"] + meta: Mapping[str, Any] + + @property + def buckets_as_dict(self) -> Mapping[str, "IpPrefixBucket"]: + return self.buckets # type: ignore + + +class IpPrefixBucket(AttrDict[Any]): + """ + :arg is_ipv6: (required) + :arg key: (required) + :arg prefix_length: (required) + :arg doc_count: (required) + :arg netmask: + """ + + is_ipv6: bool + key: str + prefix_length: int + doc_count: int + netmask: str + + +class IpRangeAggregate(AttrDict[Any]): + """ + :arg buckets: (required) the aggregation buckets as a list + :arg meta: + """ + + buckets: Sequence["IpRangeBucket"] + meta: Mapping[str, Any] + + @property + def buckets_as_dict(self) -> Mapping[str, "IpRangeBucket"]: + return self.buckets # type: ignore + + +class IpRangeBucket(AttrDict[Any]): + """ + :arg doc_count: (required) + :arg key: + :arg from: + :arg to: + """ + + doc_count: int + key: str + from_: str + to: str + + +class KnnCollectorResult(AttrDict[Any]): + """ + :arg name: (required) + :arg reason: (required) + :arg time_in_nanos: (required) + :arg time: + :arg children: + """ + + name: str + reason: str + time_in_nanos: Any + time: Any + children: Sequence["KnnCollectorResult"] + + +class KnnQueryProfileBreakdown(AttrDict[Any]): + """ + :arg advance: (required) + :arg advance_count: (required) + :arg build_scorer: (required) + :arg build_scorer_count: (required) + :arg compute_max_score: (required) + :arg compute_max_score_count: (required) + :arg count_weight: (required) + :arg count_weight_count: (required) + :arg create_weight: (required) + :arg create_weight_count: (required) + :arg match: (required) + :arg match_count: (required) + :arg next_doc: (required) + :arg next_doc_count: (required) + :arg score: (required) + :arg score_count: (required) + :arg set_min_competitive_score: (required) + :arg set_min_competitive_score_count: (required) + :arg shallow_advance: (required) + :arg shallow_advance_count: (required) + """ + + advance: int + advance_count: int + build_scorer: int + build_scorer_count: int + compute_max_score: int + compute_max_score_count: int + count_weight: int + count_weight_count: int + create_weight: int + create_weight_count: int + match: int + match_count: int + next_doc: int + next_doc_count: int + score: int + score_count: int + set_min_competitive_score: int + set_min_competitive_score_count: int + shallow_advance: int + shallow_advance_count: int + + +class KnnQueryProfileResult(AttrDict[Any]): + """ + :arg type: (required) + :arg description: (required) + :arg time_in_nanos: (required) + :arg breakdown: (required) + :arg time: + :arg debug: + :arg children: + """ + + type: str + description: str + time_in_nanos: Any + breakdown: "KnnQueryProfileBreakdown" + time: Any + debug: Mapping[str, Any] + children: Sequence["KnnQueryProfileResult"] + + +class LongRareTermsAggregate(AttrDict[Any]): + """ + Result of the `rare_terms` aggregation when the field is some kind of + whole number like a integer, long, or a date. + + :arg buckets: (required) the aggregation buckets as a list + :arg meta: + """ + + buckets: Sequence["LongRareTermsBucket"] + meta: Mapping[str, Any] + + @property + def buckets_as_dict(self) -> Mapping[str, "LongRareTermsBucket"]: + return self.buckets # type: ignore + + +class LongRareTermsBucket(AttrDict[Any]): + """ + :arg key: (required) + :arg doc_count: (required) + :arg key_as_string: + """ + + key: int + doc_count: int + key_as_string: str + + +class LongTermsAggregate(AttrDict[Any]): + """ + Result of a `terms` aggregation when the field is some kind of whole + number like a integer, long, or a date. + + :arg doc_count_error_upper_bound: + :arg sum_other_doc_count: + :arg buckets: (required) the aggregation buckets as a list + :arg meta: + """ + + doc_count_error_upper_bound: int + sum_other_doc_count: int + buckets: Sequence["LongTermsBucket"] + meta: Mapping[str, Any] + + @property + def buckets_as_dict(self) -> Mapping[str, "LongTermsBucket"]: + return self.buckets # type: ignore + + +class LongTermsBucket(AttrDict[Any]): + """ + :arg key: (required) + :arg doc_count: (required) + :arg key_as_string: + :arg doc_count_error_upper_bound: + """ + + key: int + doc_count: int + key_as_string: str + doc_count_error_upper_bound: int + + +class MatrixStatsAggregate(AttrDict[Any]): + """ + :arg doc_count: (required) + :arg fields: + :arg meta: + """ + + doc_count: int + fields: Sequence["MatrixStatsFields"] + meta: Mapping[str, Any] + + +class MatrixStatsFields(AttrDict[Any]): + """ + :arg name: (required) + :arg count: (required) + :arg mean: (required) + :arg variance: (required) + :arg skewness: (required) + :arg kurtosis: (required) + :arg covariance: (required) + :arg correlation: (required) + """ + + name: str + count: int + mean: float + variance: float + skewness: float + kurtosis: float + covariance: Mapping[str, float] + correlation: Mapping[str, float] + + +class MaxAggregate(AttrDict[Any]): + """ + :arg value: (required) The metric value. A missing value generally + means that there was no data to aggregate, unless specified + otherwise. + :arg value_as_string: + :arg meta: + """ + + value: Union[float, None] + value_as_string: str + meta: Mapping[str, Any] + + +class MedianAbsoluteDeviationAggregate(AttrDict[Any]): + """ + :arg value: (required) The metric value. A missing value generally + means that there was no data to aggregate, unless specified + otherwise. + :arg value_as_string: + :arg meta: + """ + + value: Union[float, None] + value_as_string: str + meta: Mapping[str, Any] + + +class MinAggregate(AttrDict[Any]): + """ + :arg value: (required) The metric value. A missing value generally + means that there was no data to aggregate, unless specified + otherwise. + :arg value_as_string: + :arg meta: + """ + + value: Union[float, None] + value_as_string: str + meta: Mapping[str, Any] + + +class MissingAggregate(AttrDict[Any]): + """ + :arg doc_count: (required) + :arg meta: + """ + + doc_count: int + meta: Mapping[str, Any] + + +class MultiTermsAggregate(AttrDict[Any]): + """ + :arg doc_count_error_upper_bound: + :arg sum_other_doc_count: + :arg buckets: (required) the aggregation buckets as a list + :arg meta: + """ + + doc_count_error_upper_bound: int + sum_other_doc_count: int + buckets: Sequence["MultiTermsBucket"] + meta: Mapping[str, Any] + + @property + def buckets_as_dict(self) -> Mapping[str, "MultiTermsBucket"]: + return self.buckets # type: ignore + + +class MultiTermsBucket(AttrDict[Any]): + """ + :arg key: (required) + :arg doc_count: (required) + :arg key_as_string: + :arg doc_count_error_upper_bound: + """ + + key: Sequence[Union[int, float, str, bool, None, Any]] + doc_count: int + key_as_string: str + doc_count_error_upper_bound: int + + +class NestedAggregate(AttrDict[Any]): + """ + :arg doc_count: (required) + :arg meta: + """ + + doc_count: int + meta: Mapping[str, Any] + + +class NestedIdentity(AttrDict[Any]): + """ + :arg field: (required) + :arg offset: (required) + :arg _nested: + """ + + field: str + offset: int + _nested: "NestedIdentity" + + +class ParentAggregate(AttrDict[Any]): + """ + :arg doc_count: (required) + :arg meta: + """ + + doc_count: int + meta: Mapping[str, Any] + + +class PercentilesBucketAggregate(AttrDict[Any]): + """ + :arg values: (required) + :arg meta: + """ + + values: Union[Mapping[str, Union[str, int, None]], Sequence["ArrayPercentilesItem"]] + meta: Mapping[str, Any] + + +class PhraseSuggest(AttrDict[Any]): + """ + :arg options: (required) + :arg length: (required) + :arg offset: (required) + :arg text: (required) + """ + + options: Sequence["PhraseSuggestOption"] + length: int + offset: int + text: str + + +class PhraseSuggestOption(AttrDict[Any]): + """ + :arg text: (required) + :arg score: (required) + :arg highlighted: + :arg collate_match: + """ + + text: str + score: float + highlighted: str + collate_match: bool + + +class Profile(AttrDict[Any]): + """ + :arg shards: (required) + """ + + shards: Sequence["ShardProfile"] + + +class QueryBreakdown(AttrDict[Any]): + """ + :arg advance: (required) + :arg advance_count: (required) + :arg build_scorer: (required) + :arg build_scorer_count: (required) + :arg create_weight: (required) + :arg create_weight_count: (required) + :arg match: (required) + :arg match_count: (required) + :arg shallow_advance: (required) + :arg shallow_advance_count: (required) + :arg next_doc: (required) + :arg next_doc_count: (required) + :arg score: (required) + :arg score_count: (required) + :arg compute_max_score: (required) + :arg compute_max_score_count: (required) + :arg count_weight: (required) + :arg count_weight_count: (required) + :arg set_min_competitive_score: (required) + :arg set_min_competitive_score_count: (required) + """ + + advance: int + advance_count: int + build_scorer: int + build_scorer_count: int + create_weight: int + create_weight_count: int + match: int + match_count: int + shallow_advance: int + shallow_advance_count: int + next_doc: int + next_doc_count: int + score: int + score_count: int + compute_max_score: int + compute_max_score_count: int + count_weight: int + count_weight_count: int + set_min_competitive_score: int + set_min_competitive_score_count: int + + +class QueryProfile(AttrDict[Any]): + """ + :arg breakdown: (required) + :arg description: (required) + :arg time_in_nanos: (required) + :arg type: (required) + :arg children: + """ + + breakdown: "QueryBreakdown" + description: str + time_in_nanos: Any + type: str + children: Sequence["QueryProfile"] + + +class RangeAggregate(AttrDict[Any]): + """ + :arg buckets: (required) the aggregation buckets as a list + :arg meta: + """ + + buckets: Sequence["RangeBucket"] + meta: Mapping[str, Any] + + @property + def buckets_as_dict(self) -> Mapping[str, "RangeBucket"]: + return self.buckets # type: ignore + + +class RangeBucket(AttrDict[Any]): + """ + :arg doc_count: (required) + :arg from: + :arg to: + :arg from_as_string: + :arg to_as_string: + :arg key: The bucket key. Present if the aggregation is _not_ keyed + """ + + doc_count: int + from_: float + to: float + from_as_string: str + to_as_string: str + key: str + + +class RateAggregate(AttrDict[Any]): + """ + :arg value: (required) + :arg value_as_string: + :arg meta: + """ + + value: float + value_as_string: str + meta: Mapping[str, Any] + + +class Retries(AttrDict[Any]): + """ + :arg bulk: (required) + :arg search: (required) + """ + + bulk: int + search: int + + +class ReverseNestedAggregate(AttrDict[Any]): + """ + :arg doc_count: (required) + :arg meta: + """ + + doc_count: int + meta: Mapping[str, Any] + + +class SamplerAggregate(AttrDict[Any]): + """ + :arg doc_count: (required) + :arg meta: + """ + + doc_count: int + meta: Mapping[str, Any] + + +class ScriptedMetricAggregate(AttrDict[Any]): + """ + :arg value: (required) + :arg meta: + """ + + value: Any + meta: Mapping[str, Any] + + +class SearchProfile(AttrDict[Any]): + """ + :arg collector: (required) + :arg query: (required) + :arg rewrite_time: (required) + """ + + collector: Sequence["Collector"] + query: Sequence["QueryProfile"] + rewrite_time: int + + +class ShardFailure(AttrDict[Any]): + """ + :arg reason: (required) + :arg shard: (required) + :arg index: + :arg node: + :arg status: + """ + + reason: "ErrorCause" + shard: int + index: str + node: str + status: str + + +class ShardProfile(AttrDict[Any]): + """ + :arg aggregations: (required) + :arg cluster: (required) + :arg id: (required) + :arg index: (required) + :arg node_id: (required) + :arg searches: (required) + :arg shard_id: (required) + :arg dfs: + :arg fetch: + """ + + aggregations: Sequence["AggregationProfile"] + cluster: str + id: str + index: str + node_id: str + searches: Sequence["SearchProfile"] + shard_id: int + dfs: "DfsProfile" + fetch: "FetchProfile" + + +class ShardStatistics(AttrDict[Any]): + """ + :arg failed: (required) + :arg successful: (required) Indicates how many shards have + successfully run the search. + :arg total: (required) Indicates how many shards the search will run + on overall. + :arg failures: + :arg skipped: + """ + + failed: int + successful: int + total: int + failures: Sequence["ShardFailure"] + skipped: int + + +class SignificantLongTermsAggregate(AttrDict[Any]): + """ + :arg bg_count: + :arg doc_count: + :arg buckets: (required) the aggregation buckets as a list + :arg meta: + """ + + bg_count: int + doc_count: int + buckets: Sequence["SignificantLongTermsBucket"] + meta: Mapping[str, Any] + + @property + def buckets_as_dict(self) -> Mapping[str, "SignificantLongTermsBucket"]: + return self.buckets # type: ignore + + +class SignificantLongTermsBucket(AttrDict[Any]): + """ + :arg key: (required) + :arg score: (required) + :arg bg_count: (required) + :arg doc_count: (required) + :arg key_as_string: + """ + + key: int + score: float + bg_count: int + doc_count: int + key_as_string: str + + +class SignificantStringTermsAggregate(AttrDict[Any]): + """ + :arg bg_count: + :arg doc_count: + :arg buckets: (required) the aggregation buckets as a list + :arg meta: + """ + + bg_count: int + doc_count: int + buckets: Sequence["SignificantStringTermsBucket"] + meta: Mapping[str, Any] + + @property + def buckets_as_dict(self) -> Mapping[str, "SignificantStringTermsBucket"]: + return self.buckets # type: ignore + + +class SignificantStringTermsBucket(AttrDict[Any]): + """ + :arg key: (required) + :arg score: (required) + :arg bg_count: (required) + :arg doc_count: (required) + """ + + key: str + score: float + bg_count: int + doc_count: int + + +class SimpleValueAggregate(AttrDict[Any]): + """ + :arg value: (required) The metric value. A missing value generally + means that there was no data to aggregate, unless specified + otherwise. + :arg value_as_string: + :arg meta: + """ + + value: Union[float, None] + value_as_string: str + meta: Mapping[str, Any] + + +class StandardDeviationBounds(AttrDict[Any]): + """ + :arg upper: (required) + :arg lower: (required) + :arg upper_population: (required) + :arg lower_population: (required) + :arg upper_sampling: (required) + :arg lower_sampling: (required) + """ + + upper: Union[float, None] + lower: Union[float, None] + upper_population: Union[float, None] + lower_population: Union[float, None] + upper_sampling: Union[float, None] + lower_sampling: Union[float, None] + + +class StandardDeviationBoundsAsString(AttrDict[Any]): + """ + :arg upper: (required) + :arg lower: (required) + :arg upper_population: (required) + :arg lower_population: (required) + :arg upper_sampling: (required) + :arg lower_sampling: (required) + """ + + upper: str + lower: str + upper_population: str + lower_population: str + upper_sampling: str + lower_sampling: str + + +class StatsAggregate(AttrDict[Any]): + """ + Statistics aggregation result. `min`, `max` and `avg` are missing if + there were no values to process (`count` is zero). + + :arg count: (required) + :arg min: (required) + :arg max: (required) + :arg avg: (required) + :arg sum: (required) + :arg min_as_string: + :arg max_as_string: + :arg avg_as_string: + :arg sum_as_string: + :arg meta: + """ + + count: int + min: Union[float, None] + max: Union[float, None] + avg: Union[float, None] + sum: float + min_as_string: str + max_as_string: str + avg_as_string: str + sum_as_string: str + meta: Mapping[str, Any] + + +class StatsBucketAggregate(AttrDict[Any]): + """ + :arg count: (required) + :arg min: (required) + :arg max: (required) + :arg avg: (required) + :arg sum: (required) + :arg min_as_string: + :arg max_as_string: + :arg avg_as_string: + :arg sum_as_string: + :arg meta: + """ + + count: int + min: Union[float, None] + max: Union[float, None] + avg: Union[float, None] + sum: float + min_as_string: str + max_as_string: str + avg_as_string: str + sum_as_string: str + meta: Mapping[str, Any] + + +class StringRareTermsAggregate(AttrDict[Any]): + """ + Result of the `rare_terms` aggregation when the field is a string. + + :arg buckets: (required) the aggregation buckets as a list + :arg meta: + """ + + buckets: Sequence["StringRareTermsBucket"] + meta: Mapping[str, Any] + + @property + def buckets_as_dict(self) -> Mapping[str, "StringRareTermsBucket"]: + return self.buckets # type: ignore + + +class StringRareTermsBucket(AttrDict[Any]): + """ + :arg key: (required) + :arg doc_count: (required) + """ + + key: str + doc_count: int + + +class StringStatsAggregate(AttrDict[Any]): + """ + :arg count: (required) + :arg min_length: (required) + :arg max_length: (required) + :arg avg_length: (required) + :arg entropy: (required) + :arg distribution: + :arg min_length_as_string: + :arg max_length_as_string: + :arg avg_length_as_string: + :arg meta: + """ + + count: int + min_length: Union[int, None] + max_length: Union[int, None] + avg_length: Union[float, None] + entropy: Union[float, None] + distribution: Union[Mapping[str, float], None] + min_length_as_string: str + max_length_as_string: str + avg_length_as_string: str + meta: Mapping[str, Any] + + +class StringTermsAggregate(AttrDict[Any]): + """ + Result of a `terms` aggregation when the field is a string. + + :arg doc_count_error_upper_bound: + :arg sum_other_doc_count: + :arg buckets: (required) the aggregation buckets as a list + :arg meta: + """ + + doc_count_error_upper_bound: int + sum_other_doc_count: int + buckets: Sequence["StringTermsBucket"] + meta: Mapping[str, Any] + + @property + def buckets_as_dict(self) -> Mapping[str, "StringTermsBucket"]: + return self.buckets # type: ignore + + +class StringTermsBucket(AttrDict[Any]): + """ + :arg key: (required) + :arg doc_count: (required) + :arg doc_count_error_upper_bound: + """ + + key: Union[int, float, str, bool, None, Any] + doc_count: int + doc_count_error_upper_bound: int + + +class SumAggregate(AttrDict[Any]): + """ + Sum aggregation result. `value` is always present and is zero if there + were no values to process. + + :arg value: (required) The metric value. A missing value generally + means that there was no data to aggregate, unless specified + otherwise. + :arg value_as_string: + :arg meta: + """ + + value: Union[float, None] + value_as_string: str + meta: Mapping[str, Any] + + +class TDigestPercentileRanksAggregate(AttrDict[Any]): + """ + :arg values: (required) + :arg meta: + """ + + values: Union[Mapping[str, Union[str, int, None]], Sequence["ArrayPercentilesItem"]] + meta: Mapping[str, Any] + + +class TDigestPercentilesAggregate(AttrDict[Any]): + """ + :arg values: (required) + :arg meta: + """ + + values: Union[Mapping[str, Union[str, int, None]], Sequence["ArrayPercentilesItem"]] + meta: Mapping[str, Any] + + +class TTestAggregate(AttrDict[Any]): + """ + :arg value: (required) + :arg value_as_string: + :arg meta: + """ + + value: Union[float, None] + value_as_string: str + meta: Mapping[str, Any] + + +class TermSuggest(AttrDict[Any]): + """ + :arg options: (required) + :arg length: (required) + :arg offset: (required) + :arg text: (required) + """ + + options: Sequence["TermSuggestOption"] + length: int + offset: int + text: str + + +class TermSuggestOption(AttrDict[Any]): + """ + :arg text: (required) + :arg score: (required) + :arg freq: (required) + :arg highlighted: + :arg collate_match: + """ + + text: str + score: float + freq: int + highlighted: str + collate_match: bool + + +class TimeSeriesAggregate(AttrDict[Any]): + """ + :arg buckets: (required) the aggregation buckets as a list + :arg meta: + """ + + buckets: Sequence["TimeSeriesBucket"] + meta: Mapping[str, Any] + + @property + def buckets_as_dict(self) -> Mapping[str, "TimeSeriesBucket"]: + return self.buckets # type: ignore + + +class TimeSeriesBucket(AttrDict[Any]): + """ + :arg key: (required) + :arg doc_count: (required) + """ + + key: Mapping[str, Union[int, float, str, bool, None, Any]] + doc_count: int + + +class TopHitsAggregate(AttrDict[Any]): + """ + :arg hits: (required) + :arg meta: + """ + + hits: "HitsMetadata" + meta: Mapping[str, Any] + + +class TopMetrics(AttrDict[Any]): + """ + :arg sort: (required) + :arg metrics: (required) + """ + + sort: Sequence[Union[Union[int, float, str, bool, None, Any], None]] + metrics: Mapping[str, Union[Union[int, float, str, bool, None, Any], None]] + + +class TopMetricsAggregate(AttrDict[Any]): + """ + :arg top: (required) + :arg meta: + """ + + top: Sequence["TopMetrics"] + meta: Mapping[str, Any] + + +class TotalHits(AttrDict[Any]): + """ + :arg relation: (required) + :arg value: (required) + """ + + relation: Literal["eq", "gte"] + value: int + + +class UnmappedRareTermsAggregate(AttrDict[Any]): + """ + Result of a `rare_terms` aggregation when the field is unmapped. + `buckets` is always empty. + + :arg buckets: (required) the aggregation buckets as a list + :arg meta: + """ + + buckets: Sequence[Any] + meta: Mapping[str, Any] + + @property + def buckets_as_dict(self) -> Mapping[str, Any]: + return self.buckets # type: ignore + + +class UnmappedSamplerAggregate(AttrDict[Any]): + """ + :arg doc_count: (required) + :arg meta: + """ + + doc_count: int + meta: Mapping[str, Any] + + +class UnmappedSignificantTermsAggregate(AttrDict[Any]): + """ + Result of the `significant_terms` aggregation on an unmapped field. + `buckets` is always empty. + + :arg bg_count: + :arg doc_count: + :arg buckets: (required) the aggregation buckets as a list + :arg meta: + """ + + bg_count: int + doc_count: int + buckets: Sequence[Any] + meta: Mapping[str, Any] + + @property + def buckets_as_dict(self) -> Mapping[str, Any]: + return self.buckets # type: ignore + + +class UnmappedTermsAggregate(AttrDict[Any]): + """ + Result of a `terms` aggregation when the field is unmapped. `buckets` + is always empty. + + :arg doc_count_error_upper_bound: + :arg sum_other_doc_count: + :arg buckets: (required) the aggregation buckets as a list + :arg meta: + """ + + doc_count_error_upper_bound: int + sum_other_doc_count: int + buckets: Sequence[Any] + meta: Mapping[str, Any] + + @property + def buckets_as_dict(self) -> Mapping[str, Any]: + return self.buckets # type: ignore + + +class ValueCountAggregate(AttrDict[Any]): + """ + Value count aggregation result. `value` is always present. + + :arg value: (required) The metric value. A missing value generally + means that there was no data to aggregate, unless specified + otherwise. + :arg value_as_string: + :arg meta: + """ + + value: Union[float, None] + value_as_string: str + meta: Mapping[str, Any] + + +class VariableWidthHistogramAggregate(AttrDict[Any]): + """ + :arg buckets: (required) the aggregation buckets as a list + :arg meta: + """ + + buckets: Sequence["VariableWidthHistogramBucket"] + meta: Mapping[str, Any] + + @property + def buckets_as_dict(self) -> Mapping[str, "VariableWidthHistogramBucket"]: + return self.buckets # type: ignore + + +class VariableWidthHistogramBucket(AttrDict[Any]): + """ + :arg min: (required) + :arg key: (required) + :arg max: (required) + :arg doc_count: (required) + :arg min_as_string: + :arg key_as_string: + :arg max_as_string: + """ + + min: float + key: float + max: float + doc_count: int + min_as_string: str + key_as_string: str + max_as_string: str + + +class WeightedAvgAggregate(AttrDict[Any]): + """ + Weighted average aggregation result. `value` is missing if the weight + was set to zero. + + :arg value: (required) The metric value. A missing value generally + means that there was no data to aggregate, unless specified + otherwise. + :arg value_as_string: + :arg meta: + """ + + value: Union[float, None] + value_as_string: str + meta: Mapping[str, Any] diff --git a/elasticsearch/dsl/update_by_query.py b/elasticsearch/dsl/update_by_query.py new file mode 100644 index 000000000..fdff22bc8 --- /dev/null +++ b/elasticsearch/dsl/update_by_query.py @@ -0,0 +1,19 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from ._async.update_by_query import AsyncUpdateByQuery # noqa: F401 +from ._sync.update_by_query import UpdateByQuery # noqa: F401 diff --git a/elasticsearch/dsl/update_by_query_base.py b/elasticsearch/dsl/update_by_query_base.py new file mode 100644 index 000000000..e4490ddf6 --- /dev/null +++ b/elasticsearch/dsl/update_by_query_base.py @@ -0,0 +1,149 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import Any, Dict, Type + +from typing_extensions import Self + +from .query import Bool, Q +from .response import UpdateByQueryResponse +from .search_base import ProxyDescriptor, QueryProxy, Request +from .utils import _R, recursive_to_dict + + +class UpdateByQueryBase(Request[_R]): + query = ProxyDescriptor[Self]("query") + + def __init__(self, **kwargs: Any): + """ + Update by query request to elasticsearch. + + :arg using: `Elasticsearch` instance to use + :arg index: limit the search to index + :arg doc_type: only query this type. + + All the parameters supplied (or omitted) at creation type can be later + overridden by methods (`using`, `index` and `doc_type` respectively). + + """ + super().__init__(**kwargs) + self._response_class = UpdateByQueryResponse[_R] + self._script: Dict[str, Any] = {} + self._query_proxy = QueryProxy(self, "query") + + def filter(self, *args: Any, **kwargs: Any) -> Self: + return self.query(Bool(filter=[Q(*args, **kwargs)])) + + def exclude(self, *args: Any, **kwargs: Any) -> Self: + return self.query(Bool(filter=[~Q(*args, **kwargs)])) + + @classmethod + def from_dict(cls, d: Dict[str, Any]) -> Self: + """ + Construct a new `UpdateByQuery` instance from a raw dict containing the search + body. Useful when migrating from raw dictionaries. + + Example:: + + ubq = UpdateByQuery.from_dict({ + "query": { + "bool": { + "must": [...] + } + }, + "script": {...} + }) + ubq = ubq.filter('term', published=True) + """ + u = cls() + u.update_from_dict(d) + return u + + def _clone(self) -> Self: + """ + Return a clone of the current search request. Performs a shallow copy + of all the underlying objects. Used internally by most state modifying + APIs. + """ + ubq = super()._clone() + + ubq._response_class = self._response_class + ubq._script = self._script.copy() + ubq.query._proxied = self.query._proxied + return ubq + + def response_class(self, cls: Type[UpdateByQueryResponse[_R]]) -> Self: + """ + Override the default wrapper used for the response. + """ + ubq = self._clone() + ubq._response_class = cls + return ubq + + def update_from_dict(self, d: Dict[str, Any]) -> Self: + """ + Apply options from a serialized body to the current instance. Modifies + the object in-place. Used mostly by ``from_dict``. + """ + d = d.copy() + if "query" in d: + self.query._proxied = Q(d.pop("query")) + if "script" in d: + self._script = d.pop("script") + self._extra.update(d) + return self + + def script(self, **kwargs: Any) -> Self: + """ + Define update action to take: + https://www.elastic.co/guide/en/elasticsearch/reference/current/modules-scripting-using.html + for more details. + + Note: the API only accepts a single script, so + calling the script multiple times will overwrite. + + Example:: + + ubq = Search() + ubq = ubq.script(source="ctx._source.likes++"") + ubq = ubq.script(source="ctx._source.likes += params.f"", + lang="expression", + params={'f': 3}) + """ + ubq = self._clone() + if ubq._script: + ubq._script = {} + ubq._script.update(kwargs) + return ubq + + def to_dict(self, **kwargs: Any) -> Dict[str, Any]: + """ + Serialize the search into the dictionary that will be sent over as the + request'ubq body. + + All additional keyword arguments will be included into the dictionary. + """ + d = {} + if self.query: + d["query"] = self.query.to_dict() + + if self._script: + d["script"] = self._script + + d.update(recursive_to_dict(self._extra)) + d.update(recursive_to_dict(kwargs)) + return d diff --git a/elasticsearch/dsl/utils.py b/elasticsearch/dsl/utils.py new file mode 100644 index 000000000..b425f79a4 --- /dev/null +++ b/elasticsearch/dsl/utils.py @@ -0,0 +1,686 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + + +import collections.abc +from copy import copy +from typing import ( + TYPE_CHECKING, + Any, + Callable, + ClassVar, + Dict, + Generic, + Iterable, + Iterator, + List, + Mapping, + Optional, + Tuple, + Type, + Union, + cast, +) + +from elastic_transport.client_utils import DEFAULT +from typing_extensions import Self, TypeAlias, TypeVar + +from .exceptions import UnknownDslObject, ValidationException + +if TYPE_CHECKING: + from elastic_transport import ObjectApiResponse + from elasticsearch import AsyncElasticsearch, Elasticsearch + + from .document_base import DocumentOptions + from .field import Field + from .index_base import IndexBase + from .response import Hit # noqa: F401 + from .types import Hit as HitBaseType + +UsingType: TypeAlias = Union[str, "Elasticsearch"] +AsyncUsingType: TypeAlias = Union[str, "AsyncElasticsearch"] +AnyUsingType: TypeAlias = Union[str, "Elasticsearch", "AsyncElasticsearch"] + +_ValT = TypeVar("_ValT") # used by AttrDict +_R = TypeVar("_R", default="Hit") # used by Search and Response classes + +SKIP_VALUES = ("", None) +EXPAND__TO_DOT = True + +DOC_META_FIELDS = frozenset( + ( + "id", + "routing", + ) +) + +META_FIELDS = frozenset( + ( + # Elasticsearch metadata fields, except 'type' + "index", + "using", + "score", + "version", + "seq_no", + "primary_term", + ) +).union(DOC_META_FIELDS) + + +def _wrap(val: Any, obj_wrapper: Optional[Callable[[Any], Any]] = None) -> Any: + if isinstance(val, dict): + return AttrDict(val) if obj_wrapper is None else obj_wrapper(val) + if isinstance(val, list): + return AttrList(val) + return val + + +def _recursive_to_dict(value: Any) -> Any: + if hasattr(value, "to_dict"): + return value.to_dict() + elif isinstance(value, dict) or isinstance(value, AttrDict): + return {k: _recursive_to_dict(v) for k, v in value.items()} + elif isinstance(value, list) or isinstance(value, AttrList): + return [recursive_to_dict(elem) for elem in value] + else: + return value + + +class AttrList(Generic[_ValT]): + def __init__( + self, l: List[_ValT], obj_wrapper: Optional[Callable[[_ValT], Any]] = None + ): + # make iterables into lists + if not isinstance(l, list): + l = list(l) + self._l_ = l + self._obj_wrapper = obj_wrapper + + def __repr__(self) -> str: + return repr(self._l_) + + def __eq__(self, other: Any) -> bool: + if isinstance(other, AttrList): + return other._l_ == self._l_ + # make sure we still equal to a dict with the same data + return bool(other == self._l_) + + def __ne__(self, other: Any) -> bool: + return not self == other + + def __getitem__(self, k: Union[int, slice]) -> Any: + l = self._l_[k] + if isinstance(k, slice): + return AttrList[_ValT](l, obj_wrapper=self._obj_wrapper) # type: ignore[arg-type] + return _wrap(l, self._obj_wrapper) + + def __setitem__(self, k: int, value: _ValT) -> None: + self._l_[k] = value + + def __iter__(self) -> Iterator[Any]: + return map(lambda i: _wrap(i, self._obj_wrapper), self._l_) + + def __len__(self) -> int: + return len(self._l_) + + def __nonzero__(self) -> bool: + return bool(self._l_) + + __bool__ = __nonzero__ + + def __getattr__(self, name: str) -> Any: + return getattr(self._l_, name) + + def __getstate__(self) -> Tuple[List[_ValT], Optional[Callable[[_ValT], Any]]]: + return self._l_, self._obj_wrapper + + def __setstate__( + self, state: Tuple[List[_ValT], Optional[Callable[[_ValT], Any]]] + ) -> None: + self._l_, self._obj_wrapper = state + + def to_list(self) -> List[_ValT]: + return self._l_ + + +class AttrDict(Generic[_ValT]): + """ + Helper class to provide attribute like access (read and write) to + dictionaries. Used to provide a convenient way to access both results and + nested dsl dicts. + """ + + _d_: Dict[str, _ValT] + RESERVED: Dict[str, str] = {"from_": "from"} + + def __init__(self, d: Dict[str, _ValT]): + # assign the inner dict manually to prevent __setattr__ from firing + super().__setattr__("_d_", d) + + def __contains__(self, key: object) -> bool: + return key in self._d_ + + def __nonzero__(self) -> bool: + return bool(self._d_) + + __bool__ = __nonzero__ + + def __dir__(self) -> List[str]: + # introspection for auto-complete in IPython etc + return list(self._d_.keys()) + + def __eq__(self, other: Any) -> bool: + if isinstance(other, AttrDict): + return other._d_ == self._d_ + # make sure we still equal to a dict with the same data + return bool(other == self._d_) + + def __ne__(self, other: Any) -> bool: + return not self == other + + def __repr__(self) -> str: + r = repr(self._d_) + if len(r) > 60: + r = r[:60] + "...}" + return r + + def __getstate__(self) -> Tuple[Dict[str, _ValT]]: + return (self._d_,) + + def __setstate__(self, state: Tuple[Dict[str, _ValT]]) -> None: + super().__setattr__("_d_", state[0]) + + def __getattr__(self, attr_name: str) -> Any: + try: + return self.__getitem__(attr_name) + except KeyError: + raise AttributeError( + f"{self.__class__.__name__!r} object has no attribute {attr_name!r}" + ) + + def __delattr__(self, attr_name: str) -> None: + try: + del self._d_[self.RESERVED.get(attr_name, attr_name)] + except KeyError: + raise AttributeError( + f"{self.__class__.__name__!r} object has no attribute {attr_name!r}" + ) + + def __getitem__(self, key: str) -> Any: + return _wrap(self._d_[self.RESERVED.get(key, key)]) + + def __setitem__(self, key: str, value: _ValT) -> None: + self._d_[self.RESERVED.get(key, key)] = value + + def __delitem__(self, key: str) -> None: + del self._d_[self.RESERVED.get(key, key)] + + def __setattr__(self, name: str, value: _ValT) -> None: + # the __orig__class__ attribute has to be treated as an exception, as + # is it added to an object when it is instantiated with type arguments + if ( + name in self._d_ or not hasattr(self.__class__, name) + ) and name != "__orig_class__": + self._d_[self.RESERVED.get(name, name)] = value + else: + # there is an attribute on the class (could be property, ..) - don't add it as field + super().__setattr__(name, value) + + def __iter__(self) -> Iterator[str]: + return iter(self._d_) + + def to_dict(self, recursive: bool = False) -> Dict[str, _ValT]: + return cast( + Dict[str, _ValT], _recursive_to_dict(self._d_) if recursive else self._d_ + ) + + def keys(self) -> Iterable[str]: + return self._d_.keys() + + def items(self) -> Iterable[Tuple[str, _ValT]]: + return self._d_.items() + + +class DslMeta(type): + """ + Base Metaclass for DslBase subclasses that builds a registry of all classes + for given DslBase subclass (== all the query types for the Query subclass + of DslBase). + + It then uses the information from that registry (as well as `name` and + `shortcut` attributes from the base class) to construct any subclass based + on it's name. + + For typical use see `QueryMeta` and `Query` in `elasticsearch.dsl.query`. + """ + + name: str + _classes: Dict[str, type] + _type_name: str + _types: ClassVar[Dict[str, Type["DslBase"]]] = {} + + def __init__(cls, name: str, bases: Tuple[type, ...], attrs: Dict[str, Any]): + super().__init__(name, bases, attrs) + # skip for DslBase + if not hasattr(cls, "_type_shortcut"): + return + if not cls.name: + # abstract base class, register it's shortcut + cls._types[cls._type_name] = cls._type_shortcut + # and create a registry for subclasses + if not hasattr(cls, "_classes"): + cls._classes = {} + elif cls.name not in cls._classes: + # normal class, register it + cls._classes[cls.name] = cls + + @classmethod + def get_dsl_type(cls, name: str) -> Type["DslBase"]: + try: + return cls._types[name] + except KeyError: + raise UnknownDslObject(f"DSL type {name} does not exist.") + + +class DslBase(metaclass=DslMeta): + """ + Base class for all DSL objects - queries, filters, aggregations etc. Wraps + a dictionary representing the object's json. + + Provides several feature: + - attribute access to the wrapped dictionary (.field instead of ['field']) + - _clone method returning a copy of self + - to_dict method to serialize into dict (to be sent via elasticsearch-py) + - basic logical operators (&, | and ~) using a Bool(Filter|Query) TODO: + move into a class specific for Query/Filter + - respects the definition of the class and (de)serializes it's + attributes based on the `_param_defs` definition (for example turning + all values in the `must` attribute into Query objects) + """ + + _param_defs: ClassVar[Dict[str, Dict[str, Union[str, bool]]]] = {} + + @classmethod + def get_dsl_class( + cls: Type[Self], name: str, default: Optional[str] = None + ) -> Type[Self]: + try: + return cls._classes[name] + except KeyError: + if default is not None: + return cls._classes[default] + raise UnknownDslObject( + f"DSL class `{name}` does not exist in {cls._type_name}." + ) + + def __init__(self, _expand__to_dot: Optional[bool] = None, **params: Any) -> None: + if _expand__to_dot is None: + _expand__to_dot = EXPAND__TO_DOT + self._params: Dict[str, Any] = {} + for pname, pvalue in params.items(): + if pvalue == DEFAULT: + continue + # expand "__" to dots + if "__" in pname and _expand__to_dot: + pname = pname.replace("__", ".") + # convert instrumented fields to string + if type(pvalue).__name__ == "InstrumentedField": + pvalue = str(pvalue) + self._setattr(pname, pvalue) + + def _repr_params(self) -> str: + """Produce a repr of all our parameters to be used in __repr__.""" + return ", ".join( + f"{n.replace('.', '__')}={v!r}" + for (n, v) in sorted(self._params.items()) + # make sure we don't include empty typed params + if "type" not in self._param_defs.get(n, {}) or v + ) + + def __repr__(self) -> str: + return f"{self.__class__.__name__}({self._repr_params()})" + + def __eq__(self, other: Any) -> bool: + return isinstance(other, self.__class__) and other.to_dict() == self.to_dict() + + def __ne__(self, other: Any) -> bool: + return not self == other + + def __setattr__(self, name: str, value: Any) -> None: + if name.startswith("_"): + return super().__setattr__(name, value) + return self._setattr(name, value) + + def _setattr(self, name: str, value: Any) -> None: + # if this attribute has special type assigned to it... + name = AttrDict.RESERVED.get(name, name) + if name in self._param_defs: + pinfo = self._param_defs[name] + + if "type" in pinfo: + # get the shortcut used to construct this type (query.Q, aggs.A, etc) + shortcut = self.__class__.get_dsl_type(str(pinfo["type"])) + + # list of dict(name -> DslBase) + if pinfo.get("multi") and pinfo.get("hash"): + if not isinstance(value, (tuple, list)): + value = (value,) + value = list( + {k: shortcut(v) for (k, v) in obj.items()} for obj in value + ) + elif pinfo.get("multi"): + if not isinstance(value, (tuple, list)): + value = (value,) + value = list(map(shortcut, value)) + + # dict(name -> DslBase), make sure we pickup all the objs + elif pinfo.get("hash"): + value = {k: shortcut(v) for (k, v) in value.items()} + + # single value object, just convert + else: + value = shortcut(value) + self._params[name] = value + + def __getattr__(self, name: str) -> Any: + if name.startswith("_"): + raise AttributeError( + f"{self.__class__.__name__!r} object has no attribute {name!r}" + ) + + value = None + try: + value = self._params[name] + except KeyError: + # compound types should never throw AttributeError and return empty + # container instead + if name in self._param_defs: + pinfo = self._param_defs[name] + if pinfo.get("multi"): + value = self._params.setdefault(name, []) + elif pinfo.get("hash"): + value = self._params.setdefault(name, {}) + if value is None: + raise AttributeError( + f"{self.__class__.__name__!r} object has no attribute {name!r}" + ) + + # wrap nested dicts in AttrDict for convenient access + if isinstance(value, dict): + return AttrDict(value) + return value + + def to_dict(self) -> Dict[str, Any]: + """ + Serialize the DSL object to plain dict + """ + d = {} + for pname, value in self._params.items(): + pinfo = self._param_defs.get(pname) + + # typed param + if pinfo and "type" in pinfo: + # don't serialize empty lists and dicts for typed fields + if value in ({}, []): + continue + + # list of dict(name -> DslBase) + if pinfo.get("multi") and pinfo.get("hash"): + value = list( + {k: v.to_dict() for k, v in obj.items()} for obj in value + ) + + # multi-values are serialized as list of dicts + elif pinfo.get("multi"): + value = list(map(lambda x: x.to_dict(), value)) + + # squash all the hash values into one dict + elif pinfo.get("hash"): + value = {k: v.to_dict() for k, v in value.items()} + + # serialize single values + else: + value = value.to_dict() + + # serialize anything with to_dict method + elif hasattr(value, "to_dict"): + value = value.to_dict() + + d[pname] = value + return {self.name: d} + + def _clone(self) -> Self: + c = self.__class__() + for attr in self._params: + c._params[attr] = copy(self._params[attr]) + return c + + +if TYPE_CHECKING: + HitMetaBase = HitBaseType +else: + HitMetaBase = AttrDict[Any] + + +class HitMeta(HitMetaBase): + inner_hits: Mapping[str, Any] + + def __init__( + self, + document: Dict[str, Any], + exclude: Tuple[str, ...] = ("_source", "_fields"), + ): + d = { + k[1:] if k.startswith("_") else k: v + for (k, v) in document.items() + if k not in exclude + } + if "type" in d: + # make sure we are consistent everywhere in python + d["doc_type"] = d.pop("type") + super().__init__(d) + + +class ObjectBase(AttrDict[Any]): + _doc_type: "DocumentOptions" + _index: "IndexBase" + meta: HitMeta + + def __init__(self, meta: Optional[Dict[str, Any]] = None, **kwargs: Any): + meta = meta or {} + for k in list(kwargs): + if k.startswith("_") and k[1:] in META_FIELDS: + meta[k] = kwargs.pop(k) + + super(AttrDict, self).__setattr__("meta", HitMeta(meta)) + + # process field defaults + if hasattr(self, "_defaults"): + for name in self._defaults: + if name not in kwargs: + value = self._defaults[name] + if callable(value): + value = value() + kwargs[name] = value + + super().__init__(kwargs) + + @classmethod + def __list_fields(cls) -> Iterator[Tuple[str, "Field", bool]]: + """ + Get all the fields defined for our class, if we have an Index, try + looking at the index mappings as well, mark the fields from Index as + optional. + """ + for name in cls._doc_type.mapping: + field = cls._doc_type.mapping[name] + yield name, field, False + + if hasattr(cls.__class__, "_index"): + if not cls._index._mapping: + return + for name in cls._index._mapping: + # don't return fields that are in _doc_type + if name in cls._doc_type.mapping: + continue + field = cls._index._mapping[name] + yield name, field, True + + @classmethod + def __get_field(cls, name: str) -> Optional["Field"]: + try: + return cls._doc_type.mapping[name] + except KeyError: + # fallback to fields on the Index + if hasattr(cls, "_index") and cls._index._mapping: + try: + return cls._index._mapping[name] + except KeyError: + pass + return None + + @classmethod + def from_es(cls, hit: Union[Dict[str, Any], "ObjectApiResponse[Any]"]) -> Self: + meta = hit.copy() + data = meta.pop("_source", {}) + doc = cls(meta=meta) + doc._from_dict(data) + return doc + + def _from_dict(self, data: Dict[str, Any]) -> None: + for k, v in data.items(): + f = self.__get_field(k) + if f and f._coerce: + v = f.deserialize(v) + setattr(self, k, v) + + def __getstate__(self) -> Tuple[Dict[str, Any], Dict[str, Any]]: # type: ignore[override] + return self.to_dict(), self.meta._d_ + + def __setstate__(self, state: Tuple[Dict[str, Any], Dict[str, Any]]) -> None: # type: ignore[override] + data, meta = state + super(AttrDict, self).__setattr__("_d_", {}) + super(AttrDict, self).__setattr__("meta", HitMeta(meta)) + self._from_dict(data) + + def __getattr__(self, name: str) -> Any: + try: + return super().__getattr__(name) + except AttributeError: + f = self.__get_field(name) + if f is not None and hasattr(f, "empty"): + value = f.empty() + if value not in SKIP_VALUES: + setattr(self, name, value) + value = getattr(self, name) + return value + raise + + def __setattr__(self, name: str, value: Any) -> None: + if name in self.__class__._doc_type.mapping: + self._d_[name] = value + else: + super().__setattr__(name, value) + + def to_dict(self, skip_empty: bool = True) -> Dict[str, Any]: + out = {} + for k, v in self._d_.items(): + # if this is a mapped field, + f = self.__get_field(k) + if f and f._coerce: + v = f.serialize(v) + + # if someone assigned AttrList, unwrap it + if isinstance(v, AttrList): + v = v._l_ + + if skip_empty: + # don't serialize empty values + # careful not to include numeric zeros + if v in ([], {}, None): + continue + + out[k] = v + return out + + def clean_fields(self, validate: bool = True) -> None: + errors: Dict[str, List[ValidationException]] = {} + for name, field, optional in self.__list_fields(): + data = self._d_.get(name, None) + if data is None and optional: + continue + try: + # save the cleaned value + data = field.clean(data) + except ValidationException as e: + errors.setdefault(name, []).append(e) + + if name in self._d_ or data not in ([], {}, None): + self._d_[name] = cast(Any, data) + + if validate and errors: + raise ValidationException(errors) + + def clean(self) -> None: + pass + + def full_clean(self) -> None: + self.clean_fields(validate=False) + self.clean() + self.clean_fields(validate=True) + + +def merge( + data: Union[Dict[str, Any], AttrDict[Any]], + new_data: Union[Dict[str, Any], AttrDict[Any]], + raise_on_conflict: bool = False, +) -> None: + if not ( + isinstance(data, (AttrDict, collections.abc.Mapping)) + and isinstance(new_data, (AttrDict, collections.abc.Mapping)) + ): + raise ValueError( + f"You can only merge two dicts! Got {data!r} and {new_data!r} instead." + ) + + for key, value in new_data.items(): + if ( + key in data + and isinstance(data[key], (AttrDict, collections.abc.Mapping)) + and isinstance(value, (AttrDict, collections.abc.Mapping)) + ): + merge(data[key], value, raise_on_conflict) # type: ignore + elif key in data and data[key] != value and raise_on_conflict: + raise ValueError(f"Incompatible data for key {key!r}, cannot be merged.") + else: + data[key] = value + + +def recursive_to_dict(data: Any) -> Any: + """Recursively transform objects that potentially have .to_dict() + into dictionary literals by traversing AttrList, AttrDict, list, + tuple, and Mapping types. + """ + if isinstance(data, AttrList): + data = list(data._l_) + elif hasattr(data, "to_dict"): + data = data.to_dict() + if isinstance(data, (list, tuple)): + return type(data)(recursive_to_dict(inner) for inner in data) + elif isinstance(data, dict): + return {key: recursive_to_dict(val) for key, val in data.items()} + return data diff --git a/elasticsearch/dsl/wrappers.py b/elasticsearch/dsl/wrappers.py new file mode 100644 index 000000000..ecd2e1363 --- /dev/null +++ b/elasticsearch/dsl/wrappers.py @@ -0,0 +1,119 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import operator +from typing import ( + TYPE_CHECKING, + Callable, + ClassVar, + Dict, + Literal, + Mapping, + Optional, + Tuple, + TypeVar, + Union, + cast, +) + +if TYPE_CHECKING: + from _operator import _SupportsComparison + +from typing_extensions import TypeAlias + +from .utils import AttrDict + +ComparisonOperators: TypeAlias = Literal["lt", "lte", "gt", "gte"] +RangeValT = TypeVar("RangeValT", bound="_SupportsComparison") + +__all__ = ["Range"] + + +class Range(AttrDict[RangeValT]): + OPS: ClassVar[ + Mapping[ + ComparisonOperators, + Callable[["_SupportsComparison", "_SupportsComparison"], bool], + ] + ] = { + "lt": operator.lt, + "lte": operator.le, + "gt": operator.gt, + "gte": operator.ge, + } + + def __init__( + self, + d: Optional[Dict[str, RangeValT]] = None, + /, + **kwargs: RangeValT, + ): + if d is not None and (kwargs or not isinstance(d, dict)): + raise ValueError( + "Range accepts a single dictionary or a set of keyword arguments." + ) + + if d is None: + data = kwargs + else: + data = d + + for k in data: + if k not in self.OPS: + raise ValueError(f"Range received an unknown operator {k!r}") + + if "gt" in data and "gte" in data: + raise ValueError("You cannot specify both gt and gte for Range.") + + if "lt" in data and "lte" in data: + raise ValueError("You cannot specify both lt and lte for Range.") + + super().__init__(data) + + def __repr__(self) -> str: + return "Range(%s)" % ", ".join("%s=%r" % op for op in self._d_.items()) + + def __contains__(self, item: object) -> bool: + if isinstance(item, str): + return super().__contains__(item) + + item_supports_comp = any(hasattr(item, f"__{op}__") for op in self.OPS) + if not item_supports_comp: + return False + + for op in self.OPS: + if op in self._d_ and not self.OPS[op]( + cast("_SupportsComparison", item), self._d_[op] + ): + return False + return True + + @property + def upper(self) -> Union[Tuple[RangeValT, bool], Tuple[None, Literal[False]]]: + if "lt" in self._d_: + return self._d_["lt"], False + if "lte" in self._d_: + return self._d_["lte"], True + return None, False + + @property + def lower(self) -> Union[Tuple[RangeValT, bool], Tuple[None, Literal[False]]]: + if "gt" in self._d_: + return self._d_["gt"], False + if "gte" in self._d_: + return self._d_["gte"], True + return None, False diff --git a/examples/dsl/README.rst b/examples/dsl/README.rst new file mode 100644 index 000000000..87bfe0ec0 --- /dev/null +++ b/examples/dsl/README.rst @@ -0,0 +1,47 @@ +Elasticsearch DSL Examples +========================== + +In this directory you can see several complete examples demonstrating key +concepts and patterns exposed by ``elasticsearch-dsl``. + +``alias_migration.py`` +---------------------- + +The alias migration example shows a useful pattern where we use versioned +indices (``test-blog-0``, ``test-blog-1``, ...) to manage schema changes and +hides that behind an alias so that the application doesn't have to be aware of +the versions and just refer to the ``test-blog`` alias for both read and write +operations. + +For simplicity we use a timestamp as version in the index name. + +``parent_child.py`` +------------------- + +More complex example highlighting the possible relationships available in +elasticsearch - `parent/child +`_ and +`nested +`_. + +``composite_agg.py`` +-------------------- + +A helper function using the `composite aggregation +`_ +to paginate over aggregation results. + +``percolate.py`` +---------------- + +A ``BlogPost`` document with automatic classification using the `percolator +`_ +functionality. + +``completion.py`` +----------------- + +As example using `completion suggester +`_ +to auto complete people's names. + diff --git a/examples/dsl/alias_migration.py b/examples/dsl/alias_migration.py new file mode 100644 index 000000000..24355aded --- /dev/null +++ b/examples/dsl/alias_migration.py @@ -0,0 +1,161 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +""" +Simple example with a single Document demonstrating how schema can be managed, +including upgrading with reindexing. + +Key concepts: + + * setup() function to first initialize the schema (as index template) in + elasticsearch. Can be called any time (recommended with every deploy of + your app). + + * migrate() function to be called any time when the schema changes - it + will create a new index (by incrementing the version) and update the alias. + By default it will also (before flipping the alias) move the data from the + previous index to the new one. + + * BlogPost._matches() class method is required for this code to work since + otherwise BlogPost will not be used to deserialize the documents as those + will have index set to the concrete index whereas the class refers to the + alias. +""" +import os +from datetime import datetime +from fnmatch import fnmatch +from typing import TYPE_CHECKING, Any, Dict, List, Optional + +from elasticsearch.dsl import Document, Keyword, connections, mapped_field + +ALIAS = "test-blog" +PATTERN = ALIAS + "-*" +PRIORITY = 100 + + +class BlogPost(Document): + if TYPE_CHECKING: + # definitions here help type checkers understand additional arguments + # that are allowed in the constructor + _id: int + + title: str + tags: List[str] = mapped_field(Keyword()) + content: str + published: Optional[datetime] = mapped_field(default=None) + + def is_published(self) -> bool: + return bool(self.published and datetime.now() > self.published) + + @classmethod + def _matches(cls, hit: Dict[str, Any]) -> bool: + # override _matches to match indices in a pattern instead of just ALIAS + # hit is the raw dict as returned by elasticsearch + return fnmatch(hit["_index"], PATTERN) + + class Index: + # we will use an alias instead of the index + name = ALIAS + # set settings and possibly other attributes of the index like + # analyzers + settings = {"number_of_shards": 1, "number_of_replicas": 0} + + +def setup() -> None: + """ + Create the index template in elasticsearch specifying the mappings and any + settings to be used. This can be run at any time, ideally at every new code + deploy. + """ + # create an index template + index_template = BlogPost._index.as_composable_template( + ALIAS, PATTERN, priority=PRIORITY + ) + # upload the template into elasticsearch + # potentially overriding the one already there + index_template.save() + + # create the first index if it doesn't exist + if not BlogPost._index.exists(): + migrate(move_data=False) + + +def migrate(move_data: bool = True, update_alias: bool = True) -> None: + """ + Upgrade function that creates a new index for the data. Optionally it also can + (and by default will) reindex previous copy of the data into the new index + (specify ``move_data=False`` to skip this step) and update the alias to + point to the latest index (set ``update_alias=False`` to skip). + + Note that while this function is running the application can still perform + any and all searches without any loss of functionality. It should, however, + not perform any writes at this time as those might be lost. + """ + # construct a new index name by appending current timestamp + next_index = PATTERN.replace("*", datetime.now().strftime("%Y%m%d%H%M%S%f")) + + # get the low level connection + es = connections.get_connection() + + # create new index, it will use the settings from the template + es.indices.create(index=next_index) + + if move_data: + # move data from current alias to the new index + es.options(request_timeout=3600).reindex( + body={"source": {"index": ALIAS}, "dest": {"index": next_index}} + ) + # refresh the index to make the changes visible + es.indices.refresh(index=next_index) + + if update_alias: + # repoint the alias to point to the newly created index + es.indices.update_aliases( + body={ + "actions": [ + {"remove": {"alias": ALIAS, "index": PATTERN}}, + {"add": {"alias": ALIAS, "index": next_index}}, + ] + } + ) + + +def main() -> None: + # initiate the default connection to elasticsearch + connections.create_connection(hosts=[os.environ["ELASTICSEARCH_URL"]]) + + # create the empty index + setup() + + # create a new document + bp = BlogPost( + _id=0, + title="Hello World!", + tags=["testing", "dummy"], + content=open(__file__).read(), + ) + bp.save(refresh=True) + + # create new index + migrate() + + # close the connection + connections.get_connection().close() + + +if __name__ == "__main__": + main() diff --git a/examples/dsl/async/alias_migration.py b/examples/dsl/async/alias_migration.py new file mode 100644 index 000000000..94bdd63ce --- /dev/null +++ b/examples/dsl/async/alias_migration.py @@ -0,0 +1,162 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +""" +Simple example with a single Document demonstrating how schema can be managed, +including upgrading with reindexing. + +Key concepts: + + * setup() function to first initialize the schema (as index template) in + elasticsearch. Can be called any time (recommended with every deploy of + your app). + + * migrate() function to be called any time when the schema changes - it + will create a new index (by incrementing the version) and update the alias. + By default it will also (before flipping the alias) move the data from the + previous index to the new one. + + * BlogPost._matches() class method is required for this code to work since + otherwise BlogPost will not be used to deserialize the documents as those + will have index set to the concrete index whereas the class refers to the + alias. +""" +import asyncio +import os +from datetime import datetime +from fnmatch import fnmatch +from typing import TYPE_CHECKING, Any, Dict, List, Optional + +from elasticsearch.dsl import AsyncDocument, Keyword, async_connections, mapped_field + +ALIAS = "test-blog" +PATTERN = ALIAS + "-*" +PRIORITY = 100 + + +class BlogPost(AsyncDocument): + if TYPE_CHECKING: + # definitions here help type checkers understand additional arguments + # that are allowed in the constructor + _id: int + + title: str + tags: List[str] = mapped_field(Keyword()) + content: str + published: Optional[datetime] = mapped_field(default=None) + + def is_published(self) -> bool: + return bool(self.published and datetime.now() > self.published) + + @classmethod + def _matches(cls, hit: Dict[str, Any]) -> bool: + # override _matches to match indices in a pattern instead of just ALIAS + # hit is the raw dict as returned by elasticsearch + return fnmatch(hit["_index"], PATTERN) + + class Index: + # we will use an alias instead of the index + name = ALIAS + # set settings and possibly other attributes of the index like + # analyzers + settings = {"number_of_shards": 1, "number_of_replicas": 0} + + +async def setup() -> None: + """ + Create the index template in elasticsearch specifying the mappings and any + settings to be used. This can be run at any time, ideally at every new code + deploy. + """ + # create an index template + index_template = BlogPost._index.as_composable_template( + ALIAS, PATTERN, priority=PRIORITY + ) + # upload the template into elasticsearch + # potentially overriding the one already there + await index_template.save() + + # create the first index if it doesn't exist + if not await BlogPost._index.exists(): + await migrate(move_data=False) + + +async def migrate(move_data: bool = True, update_alias: bool = True) -> None: + """ + Upgrade function that creates a new index for the data. Optionally it also can + (and by default will) reindex previous copy of the data into the new index + (specify ``move_data=False`` to skip this step) and update the alias to + point to the latest index (set ``update_alias=False`` to skip). + + Note that while this function is running the application can still perform + any and all searches without any loss of functionality. It should, however, + not perform any writes at this time as those might be lost. + """ + # construct a new index name by appending current timestamp + next_index = PATTERN.replace("*", datetime.now().strftime("%Y%m%d%H%M%S%f")) + + # get the low level connection + es = async_connections.get_connection() + + # create new index, it will use the settings from the template + await es.indices.create(index=next_index) + + if move_data: + # move data from current alias to the new index + await es.options(request_timeout=3600).reindex( + body={"source": {"index": ALIAS}, "dest": {"index": next_index}} + ) + # refresh the index to make the changes visible + await es.indices.refresh(index=next_index) + + if update_alias: + # repoint the alias to point to the newly created index + await es.indices.update_aliases( + body={ + "actions": [ + {"remove": {"alias": ALIAS, "index": PATTERN}}, + {"add": {"alias": ALIAS, "index": next_index}}, + ] + } + ) + + +async def main() -> None: + # initiate the default connection to elasticsearch + async_connections.create_connection(hosts=[os.environ["ELASTICSEARCH_URL"]]) + + # create the empty index + await setup() + + # create a new document + bp = BlogPost( + _id=0, + title="Hello World!", + tags=["testing", "dummy"], + content=open(__file__).read(), + ) + await bp.save(refresh=True) + + # create new index + await migrate() + + # close the connection + await async_connections.get_connection().close() + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/examples/dsl/async/completion.py b/examples/dsl/async/completion.py new file mode 100644 index 000000000..1c5929b2b --- /dev/null +++ b/examples/dsl/async/completion.py @@ -0,0 +1,114 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +""" +Example ``Document`` with completion suggester. + +In the ``Person`` class we index the person's name to allow auto completing in +any order ("first last", "middle last first", ...). For the weight we use a +value from the ``popularity`` field which is a long. + +To make the suggestions work in different languages we added a custom analyzer +that does ascii folding. +""" + +import asyncio +import os +from itertools import permutations +from typing import TYPE_CHECKING, Any, Dict, Optional + +from elasticsearch.dsl import ( + AsyncDocument, + Completion, + Keyword, + Long, + Text, + analyzer, + async_connections, + mapped_field, + token_filter, +) + +# custom analyzer for names +ascii_fold = analyzer( + "ascii_fold", + # we don't want to split O'Brian or Toulouse-Lautrec + tokenizer="whitespace", + filter=["lowercase", token_filter("ascii_fold", "asciifolding")], +) + + +class Person(AsyncDocument): + if TYPE_CHECKING: + # definitions here help type checkers understand additional arguments + # that are allowed in the constructor + _id: Optional[int] = mapped_field(default=None) + + name: str = mapped_field(Text(fields={"keyword": Keyword()}), default="") + popularity: int = mapped_field(Long(), default=0) + + # completion field with a custom analyzer + suggest: Dict[str, Any] = mapped_field(Completion(analyzer=ascii_fold), init=False) + + def clean(self) -> None: + """ + Automatically construct the suggestion input and weight by taking all + possible permutations of Person's name as ``input`` and taking their + popularity as ``weight``. + """ + self.suggest = { + "input": [" ".join(p) for p in permutations(self.name.split())], + "weight": self.popularity, + } + + class Index: + name = "test-suggest" + settings = {"number_of_shards": 1, "number_of_replicas": 0} + + +async def main() -> None: + # initiate the default connection to elasticsearch + async_connections.create_connection(hosts=[os.environ["ELASTICSEARCH_URL"]]) + + # create the empty index + await Person.init() + + # index some sample data + for id, (name, popularity) in enumerate( + [("Henri de Toulouse-Lautrec", 42), ("Jára Cimrman", 124)] + ): + await Person(_id=id, name=name, popularity=popularity).save() + + # refresh index manually to make changes live + await Person._index.refresh() + + # run some suggestions + for text in ("já", "Jara Cimr", "tou", "de hen"): + s = Person.search() + s = s.suggest("auto_complete", text, completion={"field": "suggest"}) + response = await s.execute() + + # print out all the options we got + for option in response.suggest["auto_complete"][0].options: + print("%10s: %25s (%d)" % (text, option._source.name, option._score)) + + # close the connection + await async_connections.get_connection().close() + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/examples/dsl/async/composite_agg.py b/examples/dsl/async/composite_agg.py new file mode 100644 index 000000000..f9a7640a3 --- /dev/null +++ b/examples/dsl/async/composite_agg.py @@ -0,0 +1,94 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import asyncio +import os +from typing import Any, AsyncIterator, Dict, Mapping, Sequence, cast + +from elasticsearch.helpers import async_bulk + +from elasticsearch.dsl import Agg, AsyncSearch, Response, aggs, async_connections +from elasticsearch.dsl.types import CompositeAggregate +from test_elasticsearch.test_dsl.test_integration.test_data import DATA, GIT_INDEX + + +async def scan_aggs( + search: AsyncSearch, + source_aggs: Sequence[Mapping[str, Agg]], + inner_aggs: Dict[str, Agg] = {}, + size: int = 10, +) -> AsyncIterator[CompositeAggregate]: + """ + Helper function used to iterate over all possible bucket combinations of + ``source_aggs``, returning results of ``inner_aggs`` for each. Uses the + ``composite`` aggregation under the hood to perform this. + """ + + async def run_search(**kwargs: Any) -> Response: + s = search[:0] + bucket = s.aggs.bucket( + "comp", + aggs.Composite( + sources=source_aggs, + size=size, + **kwargs, + ), + ) + for agg_name, agg in inner_aggs.items(): + bucket[agg_name] = agg + return await s.execute() + + response = await run_search() + while response.aggregations["comp"].buckets: + for b in response.aggregations["comp"].buckets: + yield cast(CompositeAggregate, b) + if "after_key" in response.aggregations["comp"]: + after = response.aggregations["comp"].after_key + else: + after = response.aggregations["comp"].buckets[-1].key + response = await run_search(after=after) + + +async def main() -> None: + # initiate the default connection to elasticsearch + client = async_connections.create_connection( + hosts=[os.environ["ELASTICSEARCH_URL"]] + ) + + # create the index and populate it with some data + # note that the dataset is imported from the library's test suite + await client.indices.delete(index="git", ignore_unavailable=True) + await client.indices.create(index="git", **GIT_INDEX) + await async_bulk(client, DATA, raise_on_error=True, refresh=True) + + # run some aggregations on the data + async for b in scan_aggs( + AsyncSearch(index="git"), + [{"files": aggs.Terms(field="files")}], + {"first_seen": aggs.Min(field="committed_date")}, + ): + print( + "File %s has been modified %d times, first seen at %s." + % (b.key.files, b.doc_count, b.first_seen.value_as_string) + ) + + # close the connection + await async_connections.get_connection().close() + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/examples/dsl/async/parent_child.py b/examples/dsl/async/parent_child.py new file mode 100644 index 000000000..16dc6ebc3 --- /dev/null +++ b/examples/dsl/async/parent_child.py @@ -0,0 +1,276 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +""" +Complex data model example modeling stackoverflow-like data. + +It is used to showcase several key features of elasticsearch-dsl: + + * Object and Nested fields: see User and Comment classes and fields they + are used in + + * method add_comment is used to add comments + + * Parent/Child relationship + + * See the Join field on Post creating the relationship between Question + and Answer + + * Meta.matches allows the hits from same index to be wrapped in proper + classes + + * to see how child objects are created see Question.add_answer + + * Question.search_answers shows how to query for children of a + particular parent + +""" +import asyncio +import os +from datetime import datetime +from typing import TYPE_CHECKING, Any, Dict, List, Optional, cast + +from elasticsearch.dsl import ( + AsyncDocument, + AsyncSearch, + Date, + InnerDoc, + Join, + Keyword, + Long, + Text, + async_connections, + mapped_field, +) + + +class User(InnerDoc): + """ + Class used to represent a denormalized user stored on other objects. + """ + + id: int = mapped_field(Long()) + signed_up: Optional[datetime] = mapped_field(Date()) + username: str = mapped_field(Text(fields={"keyword": Keyword()})) + email: Optional[str] = mapped_field(Text(fields={"keyword": Keyword()})) + location: Optional[str] = mapped_field(Text(fields={"keyword": Keyword()})) + + +class Comment(InnerDoc): + """ + Class wrapper for nested comment objects. + """ + + author: User + created: datetime + content: str + + +class Post(AsyncDocument): + """ + Base class for Question and Answer containing the common fields. + """ + + author: User + + if TYPE_CHECKING: + # definitions here help type checkers understand additional arguments + # that are allowed in the constructor + _routing: str = mapped_field(default=None) + _id: Optional[int] = mapped_field(default=None) + + created: Optional[datetime] = mapped_field(default=None) + body: str = mapped_field(default="") + comments: List[Comment] = mapped_field(default_factory=list) + question_answer: Any = mapped_field( + Join(relations={"question": "answer"}), default_factory=dict + ) + + @classmethod + def _matches(cls, hit: Dict[str, Any]) -> bool: + # Post is an abstract class, make sure it never gets used for + # deserialization + return False + + class Index: + name = "test-qa-site" + settings = { + "number_of_shards": 1, + "number_of_replicas": 0, + } + + async def add_comment( + self, + user: User, + content: str, + created: Optional[datetime] = None, + commit: Optional[bool] = True, + ) -> Comment: + c = Comment(author=user, content=content, created=created or datetime.now()) + self.comments.append(c) + if commit: + await self.save() + return c + + async def save(self, **kwargs: Any) -> None: # type: ignore[override] + # if there is no date, use now + if self.created is None: + self.created = datetime.now() + await super().save(**kwargs) + + +class Question(Post): + tags: List[str] = mapped_field( + default_factory=list + ) # .tags will return empty list if not present + title: str = mapped_field(Text(fields={"keyword": Keyword()}), default="") + + @classmethod + def _matches(cls, hit: Dict[str, Any]) -> bool: + """Use Question class for parent documents""" + return bool(hit["_source"]["question_answer"] == "question") + + @classmethod + def search(cls, **kwargs: Any) -> AsyncSearch: # type: ignore[override] + return cls._index.search(**kwargs).filter("term", question_answer="question") + + async def add_answer( + self, + user: User, + body: str, + created: Optional[datetime] = None, + accepted: bool = False, + commit: Optional[bool] = True, + ) -> "Answer": + answer = Answer( + # required make sure the answer is stored in the same shard + _routing=self.meta.id, + # set up the parent/child mapping + question_answer={"name": "answer", "parent": self.meta.id}, + # pass in the field values + author=user, + created=created, + body=body, + is_accepted=accepted, + ) + if commit: + await answer.save() + return answer + + def search_answers(self) -> AsyncSearch: + # search only our index + s = Answer.search() + # filter for answers belonging to us + s = s.filter("parent_id", type="answer", id=self.meta.id) + # add routing to only go to specific shard + s = s.params(routing=self.meta.id) + return s + + async def get_answers(self) -> List[Any]: + """ + Get answers either from inner_hits already present or by searching + elasticsearch. + """ + if "inner_hits" in self.meta and "answer" in self.meta.inner_hits: + return cast(List[Any], self.meta.inner_hits["answer"].hits) + return [a async for a in self.search_answers()] + + async def save(self, **kwargs: Any) -> None: # type: ignore[override] + self.question_answer = "question" + await super().save(**kwargs) + + +class Answer(Post): + is_accepted: bool = mapped_field(default=False) + + @classmethod + def _matches(cls, hit: Dict[str, Any]) -> bool: + """Use Answer class for child documents with child name 'answer'""" + return ( + isinstance(hit["_source"]["question_answer"], dict) + and hit["_source"]["question_answer"].get("name") == "answer" + ) + + @classmethod + def search(cls, **kwargs: Any) -> AsyncSearch: # type: ignore[override] + return cls._index.search(**kwargs).exclude("term", question_answer="question") + + async def get_question(self) -> Optional[Question]: + # cache question in self.meta + # any attributes set on self would be interpreted as fields + if "question" not in self.meta: + self.meta.question = await Question.get( + id=self.question_answer.parent, index=self.meta.index + ) + return cast(Optional[Question], self.meta.question) + + async def save(self, **kwargs: Any) -> None: # type: ignore[override] + # set routing to parents id automatically + self.meta.routing = self.question_answer.parent + await super().save(**kwargs) + + +async def setup() -> None: + """Create an IndexTemplate and save it into elasticsearch.""" + index_template = Post._index.as_composable_template("base", priority=100) + await index_template.save() + + +async def main() -> Answer: + # initiate the default connection to elasticsearch + async_connections.create_connection(hosts=[os.environ["ELASTICSEARCH_URL"]]) + + # create index + await setup() + + # user objects to use + nick = User( + id=47, + signed_up=datetime(2017, 4, 3), + username="fxdgear", + email="nick.lang@elastic.co", + location="Colorado", + ) + honza = User( + id=42, + signed_up=datetime(2013, 4, 3), + username="honzakral", + email="honza@elastic.co", + location="Prague", + ) + + # create a question object + question = Question( + _id=1, + author=nick, + tags=["elasticsearch", "python"], + title="How do I use elasticsearch from Python?", + body=""" + I want to use elasticsearch, how do I do it from Python? + """, + ) + await question.save() + answer = await question.add_answer(honza, "Just use `elasticsearch-py`!") + + # close the connection + await async_connections.get_connection().close() + + return answer + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/examples/dsl/async/percolate.py b/examples/dsl/async/percolate.py new file mode 100644 index 000000000..75350bbed --- /dev/null +++ b/examples/dsl/async/percolate.py @@ -0,0 +1,117 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import asyncio +import os +from typing import TYPE_CHECKING, Any, List, Optional + +from elasticsearch.dsl import ( + AsyncDocument, + AsyncSearch, + Keyword, + Percolator, + Q, + Query, + async_connections, + mapped_field, +) + + +class BlogPost(AsyncDocument): + """ + Blog posts that will be automatically tagged based on percolation queries. + """ + + if TYPE_CHECKING: + # definitions here help type checkers understand additional arguments + # that are allowed in the constructor + _id: int + + content: Optional[str] + tags: List[str] = mapped_field(Keyword(), default_factory=list) + + class Index: + name = "test-blogpost" + + async def add_tags(self) -> None: + # run a percolation to automatically tag the blog post. + s = AsyncSearch(index="test-percolator") + s = s.query( + "percolate", field="query", index=self._get_index(), document=self.to_dict() + ) + + # collect all the tags from matched percolators + async for percolator in s: + self.tags.extend(percolator.tags) + + # make sure tags are unique + self.tags = list(set(self.tags)) + + async def save(self, **kwargs: Any) -> None: # type: ignore[override] + await self.add_tags() + await super().save(**kwargs) + + +class PercolatorDoc(AsyncDocument): + """ + Document class used for storing the percolation queries. + """ + + if TYPE_CHECKING: + _id: str + + # relevant fields from BlogPost must be also present here for the queries + # to be able to use them. Another option would be to use document + # inheritance but save() would have to be reset to normal behavior. + content: Optional[str] + + # the percolator query to be run against the doc + query: Query = mapped_field(Percolator()) + # list of tags to append to a document + tags: List[str] = mapped_field(Keyword(multi=True)) + + class Index: + name = "test-percolator" + settings = {"number_of_shards": 1, "number_of_replicas": 0} + + +async def setup() -> None: + # create the percolator index if it doesn't exist + if not await PercolatorDoc._index.exists(): + await PercolatorDoc.init() + + # register a percolation query looking for documents about python + await PercolatorDoc( + _id="python", + tags=["programming", "development", "python"], + content="", + query=Q("match", content="python"), + ).save(refresh=True) + + +async def main() -> None: + # initiate the default connection to elasticsearch + async_connections.create_connection(hosts=[os.environ["ELASTICSEARCH_URL"]]) + + await setup() + + # close the connection + await async_connections.get_connection().close() + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/examples/dsl/async/search_as_you_type.py b/examples/dsl/async/search_as_you_type.py new file mode 100644 index 000000000..5919f3e3f --- /dev/null +++ b/examples/dsl/async/search_as_you_type.py @@ -0,0 +1,99 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +""" +Example ``Document`` with search_as_you_type field datatype and how to search it. + +When creating a field with search_as_you_type datatype ElasticSearch creates additional +subfields to enable efficient as-you-type completion, matching terms at any position +within the input. + +To custom analyzer with ascii folding allow search to work in different languages. +""" + +import asyncio +import os +from typing import TYPE_CHECKING, Optional + +from elasticsearch.dsl import ( + AsyncDocument, + SearchAsYouType, + async_connections, + mapped_field, +) +from elasticsearch.dsl.query import MultiMatch + + +class Person(AsyncDocument): + if TYPE_CHECKING: + # definitions here help type checkers understand additional arguments + # that are allowed in the constructor + _id: Optional[int] = mapped_field(default=None) + + name: str = mapped_field(SearchAsYouType(max_shingle_size=3), default="") + + class Index: + name = "test-search-as-you-type" + settings = {"number_of_shards": 1, "number_of_replicas": 0} + + +async def main() -> None: + # initiate the default connection to elasticsearch + async_connections.create_connection(hosts=[os.environ["ELASTICSEARCH_URL"]]) + + # create the empty index + await Person.init() + + import pprint + + pprint.pprint(Person().to_dict(), indent=2) + + # index some sample data + names = [ + "Andy Warhol", + "Alphonse Mucha", + "Henri de Toulouse-Lautrec", + "Jára Cimrman", + ] + for id, name in enumerate(names): + await Person(_id=id, name=name).save() + + # refresh index manually to make changes live + await Person._index.refresh() + + # run some suggestions + for text in ("já", "Cimr", "toulouse", "Henri Tou", "a"): + s = Person.search() + + s.query = MultiMatch( # type: ignore[assignment] + query=text, + type="bool_prefix", + fields=["name", "name._2gram", "name._3gram"], + ) + + response = await s.execute() + + # print out all the options we got + for h in response: + print("%15s: %25s" % (text, h.name)) + + # close the connection + await async_connections.get_connection().close() + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/examples/dsl/async/semantic_text.py b/examples/dsl/async/semantic_text.py new file mode 100644 index 000000000..0c416067b --- /dev/null +++ b/examples/dsl/async/semantic_text.py @@ -0,0 +1,148 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + + +""" +# Semantic Text example + +Requirements: + +$ pip install "elasticsearch-dsl[async]" tqdm + +Before running this example, an ELSER inference endpoint must be created in the +Elasticsearch cluster. This can be done manually from Kibana, or with the +following curl command from a terminal: + +curl -X PUT \ + "$ELASTICSEARCH_URL/_inference/sparse_embedding/my-elser-endpoint" \ + -H "Content-Type: application/json" \ + -d '{"service":"elser","service_settings":{"num_allocations":1,"num_threads":1}}' + +To run the example: + +$ python semantic_text.py "text to search" + +The index will be created automatically if it does not exist. Add +`--recreate-index` to the command to regenerate it. + +The example dataset includes a selection of workplace documents. The +following are good example queries to try out with this dataset: + +$ python semantic_text.py "work from home" +$ python semantic_text.py "vacation time" +$ python semantic_text.py "can I bring a bird to work?" + +When the index is created, the inference service will split the documents into +short passages, and for each passage a sparse embedding will be generated using +Elastic's ELSER v2 model. +""" + +import argparse +import asyncio +import json +import os +from datetime import datetime +from typing import Any, Optional +from urllib.request import urlopen + +from tqdm import tqdm + +from elasticsearch import dsl + +DATASET_URL = "https://raw.githubusercontent.com/elastic/elasticsearch-labs/main/datasets/workplace-documents.json" + + +class WorkplaceDoc(dsl.AsyncDocument): + class Index: + name = "workplace_documents_semantic" + + name: str + summary: str + content: Any = dsl.mapped_field( + dsl.field.SemanticText(inference_id="my-elser-endpoint") + ) + created: datetime + updated: Optional[datetime] + url: str = dsl.mapped_field(dsl.Keyword()) + category: str = dsl.mapped_field(dsl.Keyword()) + + +async def create() -> None: + + # create the index + await WorkplaceDoc._index.delete(ignore_unavailable=True) + await WorkplaceDoc.init() + + # download the data + dataset = json.loads(urlopen(DATASET_URL).read()) + + # import the dataset + for data in tqdm(dataset, desc="Indexing documents..."): + doc = WorkplaceDoc( + name=data["name"], + summary=data["summary"], + content=data["content"], + created=data.get("created_on"), + updated=data.get("updated_at"), + url=data["url"], + category=data["category"], + ) + await doc.save() + + # refresh the index + await WorkplaceDoc._index.refresh() + + +async def search(query: str) -> dsl.AsyncSearch[WorkplaceDoc]: + search = WorkplaceDoc.search() + search = search[:5] + return search.query(dsl.query.Semantic(field=WorkplaceDoc.content, query=query)) + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser(description="Vector database with Elasticsearch") + parser.add_argument( + "--recreate-index", action="store_true", help="Recreate and populate the index" + ) + parser.add_argument("query", action="store", help="The search query") + return parser.parse_args() + + +async def main() -> None: + args = parse_args() + + # initiate the default connection to elasticsearch + dsl.async_connections.create_connection(hosts=[os.environ["ELASTICSEARCH_URL"]]) + + if args.recreate_index or not await WorkplaceDoc._index.exists(): + await create() + + results = await search(args.query) + + async for hit in results: + print( + f"Document: {hit.name} [Category: {hit.category}] [Score: {hit.meta.score}]" + ) + print(f"Content: {hit.content.text}") + print("--------------------\n") + + # close the connection + await dsl.async_connections.get_connection().close() + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/examples/dsl/async/sparse_vectors.py b/examples/dsl/async/sparse_vectors.py new file mode 100644 index 000000000..86d99bfff --- /dev/null +++ b/examples/dsl/async/sparse_vectors.py @@ -0,0 +1,198 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +""" +# Sparse vector database example + +Requirements: + +$ pip install nltk tqdm elasticsearch-dsl[async] + +Before running this example, the ELSER v2 model must be downloaded and deployed +to the Elasticsearch cluster, and an ingest pipeline must be defined. This can +be done manually from Kibana, or with the following three curl commands from a +terminal, adjusting the endpoint as needed: + +curl -X PUT \ + "http://localhost:9200/_ml/trained_models/.elser_model_2?wait_for_completion" \ + -H "Content-Type: application/json" \ + -d '{"input":{"field_names":["text_field"]}}' +curl -X POST \ + "http://localhost:9200/_ml/trained_models/.elser_model_2/deployment/_start?wait_for=fully_allocated" +curl -X PUT \ + "http://localhost:9200/_ingest/pipeline/elser_ingest_pipeline" \ + -H "Content-Type: application/json" \ + -d '{"processors":[{"foreach":{"field":"passages","processor":{"inference":{"model_id":".elser_model_2","input_output":[{"input_field":"_ingest._value.content","output_field":"_ingest._value.embedding"}]}}}}]}' + +To run the example: + +$ python sparse_vectors.py "text to search" + +The index will be created automatically if it does not exist. Add +`--recreate-index` to regenerate it. + +The example dataset includes a selection of workplace documents. The +following are good example queries to try out with this dataset: + +$ python sparse_vectors.py "work from home" +$ python sparse_vectors.py "vacation time" +$ python sparse_vectors.py "can I bring a bird to work?" + +When the index is created, the documents are split into short passages, and for +each passage a sparse embedding is generated using Elastic's ELSER v2 model. +The documents that are returned as search results are those that have the +highest scored passages. Add `--show-inner-hits` to the command to see +individual passage results as well. +""" + +import argparse +import asyncio +import json +import os +from datetime import datetime +from typing import Any, Dict, List, Optional +from urllib.request import urlopen + +import nltk # type: ignore +from tqdm import tqdm + +from elasticsearch.dsl import ( + AsyncDocument, + AsyncSearch, + InnerDoc, + Keyword, + Q, + SparseVector, + async_connections, + mapped_field, +) + +DATASET_URL = "https://raw.githubusercontent.com/elastic/elasticsearch-labs/main/datasets/workplace-documents.json" + +# initialize sentence tokenizer +nltk.download("punkt_tab", quiet=True) + + +class Passage(InnerDoc): + content: Optional[str] + embedding: Dict[str, float] = mapped_field(SparseVector(), init=False) + + +class WorkplaceDoc(AsyncDocument): + class Index: + name = "workplace_documents_sparse" + settings = {"default_pipeline": "elser_ingest_pipeline"} + + name: str + summary: str + content: str + created: datetime + updated: Optional[datetime] + url: str = mapped_field(Keyword()) + category: str = mapped_field(Keyword()) + passages: List[Passage] = mapped_field(default=[]) + + _model: Any = None + + def clean(self) -> None: + # split the content into sentences + passages = nltk.sent_tokenize(self.content) + + # generate an embedding for each passage and save it as a nested document + for passage in passages: + self.passages.append(Passage(content=passage)) + + +async def create() -> None: + + # create the index + await WorkplaceDoc._index.delete(ignore_unavailable=True) + await WorkplaceDoc.init() + + # download the data + dataset = json.loads(urlopen(DATASET_URL).read()) + + # import the dataset + for data in tqdm(dataset, desc="Indexing documents..."): + doc = WorkplaceDoc( + name=data["name"], + summary=data["summary"], + content=data["content"], + created=data.get("created_on"), + updated=data.get("updated_at"), + url=data["url"], + category=data["category"], + ) + await doc.save() + + +async def search(query: str) -> AsyncSearch[WorkplaceDoc]: + return WorkplaceDoc.search()[:5].query( + "nested", + path="passages", + query=Q( + "text_expansion", + passages__content={ + "model_id": ".elser_model_2", + "model_text": query, + }, + ), + inner_hits={"size": 2}, + ) + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser(description="Vector database with Elasticsearch") + parser.add_argument( + "--recreate-index", action="store_true", help="Recreate and populate the index" + ) + parser.add_argument( + "--show-inner-hits", + action="store_true", + help="Show results for individual passages", + ) + parser.add_argument("query", action="store", help="The search query") + return parser.parse_args() + + +async def main() -> None: + args = parse_args() + + # initiate the default connection to elasticsearch + async_connections.create_connection(hosts=[os.environ["ELASTICSEARCH_URL"]]) + + if args.recreate_index or not await WorkplaceDoc._index.exists(): + await create() + + results = await search(args.query) + + async for hit in results: + print( + f"Document: {hit.name} [Category: {hit.category}] [Score: {hit.meta.score}]" + ) + print(f"Summary: {hit.summary}") + if args.show_inner_hits: + for passage in hit.meta.inner_hits["passages"]: + print(f" - [Score: {passage.meta.score}] {passage.content!r}") + print("") + + # close the connection + await async_connections.get_connection().close() + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/examples/dsl/async/vectors.py b/examples/dsl/async/vectors.py new file mode 100644 index 000000000..62fbfe3f5 --- /dev/null +++ b/examples/dsl/async/vectors.py @@ -0,0 +1,187 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +""" +# Vector database example + +Requirements: + +$ pip install nltk sentence_transformers tqdm elasticsearch-dsl[async] + +To run the example: + +$ python vectors.py "text to search" + +The index will be created automatically if it does not exist. Add +`--recreate-index` to regenerate it. + +The example dataset includes a selection of workplace documents. The +following are good example queries to try out with this dataset: + +$ python vectors.py "work from home" +$ python vectors.py "vacation time" +$ python vectors.py "can I bring a bird to work?" + +When the index is created, the documents are split into short passages, and for +each passage an embedding is generated using the open source +"all-MiniLM-L6-v2" model. The documents that are returned as search results are +those that have the highest scored passages. Add `--show-inner-hits` to the +command to see individual passage results as well. +""" + +import argparse +import asyncio +import json +import os +from datetime import datetime +from typing import Any, List, Optional, cast +from urllib.request import urlopen + +import nltk # type: ignore +from sentence_transformers import SentenceTransformer +from tqdm import tqdm + +from elasticsearch.dsl import ( + AsyncDocument, + AsyncSearch, + DenseVector, + InnerDoc, + Keyword, + M, + async_connections, + mapped_field, +) + +DATASET_URL = "https://raw.githubusercontent.com/elastic/elasticsearch-labs/main/datasets/workplace-documents.json" +MODEL_NAME = "all-MiniLM-L6-v2" + +# initialize sentence tokenizer +nltk.download("punkt_tab", quiet=True) + +# this will be the embedding model +embedding_model: Any = None + + +class Passage(InnerDoc): + content: str + embedding: List[float] = mapped_field(DenseVector()) + + +class WorkplaceDoc(AsyncDocument): + class Index: + name = "workplace_documents" + + name: str + summary: str + content: str + created: datetime + updated: Optional[datetime] + url: str = mapped_field(Keyword(required=True)) + category: str = mapped_field(Keyword(required=True)) + passages: M[List[Passage]] = mapped_field(default=[]) + + @classmethod + def get_embedding(cls, input: str) -> List[float]: + global embedding_model + if embedding_model is None: + embedding_model = SentenceTransformer(MODEL_NAME) + return cast(List[float], list(embedding_model.encode(input))) + + def clean(self) -> None: + # split the content into sentences + passages = cast(List[str], nltk.sent_tokenize(self.content)) + + # generate an embedding for each passage and save it as a nested document + for passage in passages: + self.passages.append( + Passage(content=passage, embedding=self.get_embedding(passage)) + ) + + +async def create() -> None: + # create the index + await WorkplaceDoc._index.delete(ignore_unavailable=True) + await WorkplaceDoc.init() + + # download the data + dataset = json.loads(urlopen(DATASET_URL).read()) + + # import the dataset + for data in tqdm(dataset, desc="Indexing documents..."): + doc = WorkplaceDoc( + name=data["name"], + summary=data["summary"], + content=data["content"], + created=data.get("created_on"), + updated=data.get("updated_at"), + url=data["url"], + category=data["category"], + ) + await doc.save() + + +async def search(query: str) -> AsyncSearch[WorkplaceDoc]: + return WorkplaceDoc.search().knn( + field=WorkplaceDoc.passages.embedding, + k=5, + num_candidates=50, + query_vector=list(WorkplaceDoc.get_embedding(query)), + inner_hits={"size": 2}, + ) + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser(description="Vector database with Elasticsearch") + parser.add_argument( + "--recreate-index", action="store_true", help="Recreate and populate the index" + ) + parser.add_argument( + "--show-inner-hits", + action="store_true", + help="Show results for individual passages", + ) + parser.add_argument("query", action="store", help="The search query") + return parser.parse_args() + + +async def main() -> None: + args = parse_args() + + # initiate the default connection to elasticsearch + async_connections.create_connection(hosts=[os.environ["ELASTICSEARCH_URL"]]) + + if args.recreate_index or not await WorkplaceDoc._index.exists(): + await create() + + results = await search(args.query) + + async for hit in results: + print( + f"Document: {hit.name} [Category: {hit.category}] [Score: {hit.meta.score}]" + ) + print(f"Summary: {hit.summary}") + if args.show_inner_hits: + for passage in hit.meta.inner_hits["passages"]: + print(f" - [Score: {passage.meta.score}] {passage.content!r}") + print("") + + # close the connection + await async_connections.get_connection().close() + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/examples/dsl/completion.py b/examples/dsl/completion.py new file mode 100644 index 000000000..3380dc520 --- /dev/null +++ b/examples/dsl/completion.py @@ -0,0 +1,113 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +""" +Example ``Document`` with completion suggester. + +In the ``Person`` class we index the person's name to allow auto completing in +any order ("first last", "middle last first", ...). For the weight we use a +value from the ``popularity`` field which is a long. + +To make the suggestions work in different languages we added a custom analyzer +that does ascii folding. +""" + +import os +from itertools import permutations +from typing import TYPE_CHECKING, Any, Dict, Optional + +from elasticsearch.dsl import ( + Completion, + Document, + Keyword, + Long, + Text, + analyzer, + connections, + mapped_field, + token_filter, +) + +# custom analyzer for names +ascii_fold = analyzer( + "ascii_fold", + # we don't want to split O'Brian or Toulouse-Lautrec + tokenizer="whitespace", + filter=["lowercase", token_filter("ascii_fold", "asciifolding")], +) + + +class Person(Document): + if TYPE_CHECKING: + # definitions here help type checkers understand additional arguments + # that are allowed in the constructor + _id: Optional[int] = mapped_field(default=None) + + name: str = mapped_field(Text(fields={"keyword": Keyword()}), default="") + popularity: int = mapped_field(Long(), default=0) + + # completion field with a custom analyzer + suggest: Dict[str, Any] = mapped_field(Completion(analyzer=ascii_fold), init=False) + + def clean(self) -> None: + """ + Automatically construct the suggestion input and weight by taking all + possible permutations of Person's name as ``input`` and taking their + popularity as ``weight``. + """ + self.suggest = { + "input": [" ".join(p) for p in permutations(self.name.split())], + "weight": self.popularity, + } + + class Index: + name = "test-suggest" + settings = {"number_of_shards": 1, "number_of_replicas": 0} + + +def main() -> None: + # initiate the default connection to elasticsearch + connections.create_connection(hosts=[os.environ["ELASTICSEARCH_URL"]]) + + # create the empty index + Person.init() + + # index some sample data + for id, (name, popularity) in enumerate( + [("Henri de Toulouse-Lautrec", 42), ("Jára Cimrman", 124)] + ): + Person(_id=id, name=name, popularity=popularity).save() + + # refresh index manually to make changes live + Person._index.refresh() + + # run some suggestions + for text in ("já", "Jara Cimr", "tou", "de hen"): + s = Person.search() + s = s.suggest("auto_complete", text, completion={"field": "suggest"}) + response = s.execute() + + # print out all the options we got + for option in response.suggest["auto_complete"][0].options: + print("%10s: %25s (%d)" % (text, option._source.name, option._score)) + + # close the connection + connections.get_connection().close() + + +if __name__ == "__main__": + main() diff --git a/examples/dsl/composite_agg.py b/examples/dsl/composite_agg.py new file mode 100644 index 000000000..6710222b8 --- /dev/null +++ b/examples/dsl/composite_agg.py @@ -0,0 +1,91 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import os +from typing import Any, Dict, Iterator, Mapping, Sequence, cast + +from elasticsearch.helpers import bulk + +from elasticsearch.dsl import Agg, Response, Search, aggs, connections +from elasticsearch.dsl.types import CompositeAggregate +from test_elasticsearch.test_dsl.test_integration.test_data import DATA, GIT_INDEX + + +def scan_aggs( + search: Search, + source_aggs: Sequence[Mapping[str, Agg]], + inner_aggs: Dict[str, Agg] = {}, + size: int = 10, +) -> Iterator[CompositeAggregate]: + """ + Helper function used to iterate over all possible bucket combinations of + ``source_aggs``, returning results of ``inner_aggs`` for each. Uses the + ``composite`` aggregation under the hood to perform this. + """ + + def run_search(**kwargs: Any) -> Response: + s = search[:0] + bucket = s.aggs.bucket( + "comp", + aggs.Composite( + sources=source_aggs, + size=size, + **kwargs, + ), + ) + for agg_name, agg in inner_aggs.items(): + bucket[agg_name] = agg + return s.execute() + + response = run_search() + while response.aggregations["comp"].buckets: + for b in response.aggregations["comp"].buckets: + yield cast(CompositeAggregate, b) + if "after_key" in response.aggregations["comp"]: + after = response.aggregations["comp"].after_key + else: + after = response.aggregations["comp"].buckets[-1].key + response = run_search(after=after) + + +def main() -> None: + # initiate the default connection to elasticsearch + client = connections.create_connection(hosts=[os.environ["ELASTICSEARCH_URL"]]) + + # create the index and populate it with some data + # note that the dataset is imported from the library's test suite + client.indices.delete(index="git", ignore_unavailable=True) + client.indices.create(index="git", **GIT_INDEX) + bulk(client, DATA, raise_on_error=True, refresh=True) + + # run some aggregations on the data + for b in scan_aggs( + Search(index="git"), + [{"files": aggs.Terms(field="files")}], + {"first_seen": aggs.Min(field="committed_date")}, + ): + print( + "File %s has been modified %d times, first seen at %s." + % (b.key.files, b.doc_count, b.first_seen.value_as_string) + ) + + # close the connection + connections.get_connection().close() + + +if __name__ == "__main__": + main() diff --git a/examples/dsl/parent_child.py b/examples/dsl/parent_child.py new file mode 100644 index 000000000..22c597464 --- /dev/null +++ b/examples/dsl/parent_child.py @@ -0,0 +1,275 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +""" +Complex data model example modeling stackoverflow-like data. + +It is used to showcase several key features of elasticsearch-dsl: + + * Object and Nested fields: see User and Comment classes and fields they + are used in + + * method add_comment is used to add comments + + * Parent/Child relationship + + * See the Join field on Post creating the relationship between Question + and Answer + + * Meta.matches allows the hits from same index to be wrapped in proper + classes + + * to see how child objects are created see Question.add_answer + + * Question.search_answers shows how to query for children of a + particular parent + +""" +import os +from datetime import datetime +from typing import TYPE_CHECKING, Any, Dict, List, Optional, cast + +from elasticsearch.dsl import ( + Date, + Document, + InnerDoc, + Join, + Keyword, + Long, + Search, + Text, + connections, + mapped_field, +) + + +class User(InnerDoc): + """ + Class used to represent a denormalized user stored on other objects. + """ + + id: int = mapped_field(Long()) + signed_up: Optional[datetime] = mapped_field(Date()) + username: str = mapped_field(Text(fields={"keyword": Keyword()})) + email: Optional[str] = mapped_field(Text(fields={"keyword": Keyword()})) + location: Optional[str] = mapped_field(Text(fields={"keyword": Keyword()})) + + +class Comment(InnerDoc): + """ + Class wrapper for nested comment objects. + """ + + author: User + created: datetime + content: str + + +class Post(Document): + """ + Base class for Question and Answer containing the common fields. + """ + + author: User + + if TYPE_CHECKING: + # definitions here help type checkers understand additional arguments + # that are allowed in the constructor + _routing: str = mapped_field(default=None) + _id: Optional[int] = mapped_field(default=None) + + created: Optional[datetime] = mapped_field(default=None) + body: str = mapped_field(default="") + comments: List[Comment] = mapped_field(default_factory=list) + question_answer: Any = mapped_field( + Join(relations={"question": "answer"}), default_factory=dict + ) + + @classmethod + def _matches(cls, hit: Dict[str, Any]) -> bool: + # Post is an abstract class, make sure it never gets used for + # deserialization + return False + + class Index: + name = "test-qa-site" + settings = { + "number_of_shards": 1, + "number_of_replicas": 0, + } + + def add_comment( + self, + user: User, + content: str, + created: Optional[datetime] = None, + commit: Optional[bool] = True, + ) -> Comment: + c = Comment(author=user, content=content, created=created or datetime.now()) + self.comments.append(c) + if commit: + self.save() + return c + + def save(self, **kwargs: Any) -> None: # type: ignore[override] + # if there is no date, use now + if self.created is None: + self.created = datetime.now() + super().save(**kwargs) + + +class Question(Post): + tags: List[str] = mapped_field( + default_factory=list + ) # .tags will return empty list if not present + title: str = mapped_field(Text(fields={"keyword": Keyword()}), default="") + + @classmethod + def _matches(cls, hit: Dict[str, Any]) -> bool: + """Use Question class for parent documents""" + return bool(hit["_source"]["question_answer"] == "question") + + @classmethod + def search(cls, **kwargs: Any) -> Search: # type: ignore[override] + return cls._index.search(**kwargs).filter("term", question_answer="question") + + def add_answer( + self, + user: User, + body: str, + created: Optional[datetime] = None, + accepted: bool = False, + commit: Optional[bool] = True, + ) -> "Answer": + answer = Answer( + # required make sure the answer is stored in the same shard + _routing=self.meta.id, + # set up the parent/child mapping + question_answer={"name": "answer", "parent": self.meta.id}, + # pass in the field values + author=user, + created=created, + body=body, + is_accepted=accepted, + ) + if commit: + answer.save() + return answer + + def search_answers(self) -> Search: + # search only our index + s = Answer.search() + # filter for answers belonging to us + s = s.filter("parent_id", type="answer", id=self.meta.id) + # add routing to only go to specific shard + s = s.params(routing=self.meta.id) + return s + + def get_answers(self) -> List[Any]: + """ + Get answers either from inner_hits already present or by searching + elasticsearch. + """ + if "inner_hits" in self.meta and "answer" in self.meta.inner_hits: + return cast(List[Any], self.meta.inner_hits["answer"].hits) + return [a for a in self.search_answers()] + + def save(self, **kwargs: Any) -> None: # type: ignore[override] + self.question_answer = "question" + super().save(**kwargs) + + +class Answer(Post): + is_accepted: bool = mapped_field(default=False) + + @classmethod + def _matches(cls, hit: Dict[str, Any]) -> bool: + """Use Answer class for child documents with child name 'answer'""" + return ( + isinstance(hit["_source"]["question_answer"], dict) + and hit["_source"]["question_answer"].get("name") == "answer" + ) + + @classmethod + def search(cls, **kwargs: Any) -> Search: # type: ignore[override] + return cls._index.search(**kwargs).exclude("term", question_answer="question") + + def get_question(self) -> Optional[Question]: + # cache question in self.meta + # any attributes set on self would be interpreted as fields + if "question" not in self.meta: + self.meta.question = Question.get( + id=self.question_answer.parent, index=self.meta.index + ) + return cast(Optional[Question], self.meta.question) + + def save(self, **kwargs: Any) -> None: # type: ignore[override] + # set routing to parents id automatically + self.meta.routing = self.question_answer.parent + super().save(**kwargs) + + +def setup() -> None: + """Create an IndexTemplate and save it into elasticsearch.""" + index_template = Post._index.as_composable_template("base", priority=100) + index_template.save() + + +def main() -> Answer: + # initiate the default connection to elasticsearch + connections.create_connection(hosts=[os.environ["ELASTICSEARCH_URL"]]) + + # create index + setup() + + # user objects to use + nick = User( + id=47, + signed_up=datetime(2017, 4, 3), + username="fxdgear", + email="nick.lang@elastic.co", + location="Colorado", + ) + honza = User( + id=42, + signed_up=datetime(2013, 4, 3), + username="honzakral", + email="honza@elastic.co", + location="Prague", + ) + + # create a question object + question = Question( + _id=1, + author=nick, + tags=["elasticsearch", "python"], + title="How do I use elasticsearch from Python?", + body=""" + I want to use elasticsearch, how do I do it from Python? + """, + ) + question.save() + answer = question.add_answer(honza, "Just use `elasticsearch-py`!") + + # close the connection + connections.get_connection().close() + + return answer + + +if __name__ == "__main__": + main() diff --git a/examples/dsl/percolate.py b/examples/dsl/percolate.py new file mode 100644 index 000000000..d8747feda --- /dev/null +++ b/examples/dsl/percolate.py @@ -0,0 +1,116 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import os +from typing import TYPE_CHECKING, Any, List, Optional + +from elasticsearch.dsl import ( + Document, + Keyword, + Percolator, + Q, + Query, + Search, + connections, + mapped_field, +) + + +class BlogPost(Document): + """ + Blog posts that will be automatically tagged based on percolation queries. + """ + + if TYPE_CHECKING: + # definitions here help type checkers understand additional arguments + # that are allowed in the constructor + _id: int + + content: Optional[str] + tags: List[str] = mapped_field(Keyword(), default_factory=list) + + class Index: + name = "test-blogpost" + + def add_tags(self) -> None: + # run a percolation to automatically tag the blog post. + s = Search(index="test-percolator") + s = s.query( + "percolate", field="query", index=self._get_index(), document=self.to_dict() + ) + + # collect all the tags from matched percolators + for percolator in s: + self.tags.extend(percolator.tags) + + # make sure tags are unique + self.tags = list(set(self.tags)) + + def save(self, **kwargs: Any) -> None: # type: ignore[override] + self.add_tags() + super().save(**kwargs) + + +class PercolatorDoc(Document): + """ + Document class used for storing the percolation queries. + """ + + if TYPE_CHECKING: + _id: str + + # relevant fields from BlogPost must be also present here for the queries + # to be able to use them. Another option would be to use document + # inheritance but save() would have to be reset to normal behavior. + content: Optional[str] + + # the percolator query to be run against the doc + query: Query = mapped_field(Percolator()) + # list of tags to append to a document + tags: List[str] = mapped_field(Keyword(multi=True)) + + class Index: + name = "test-percolator" + settings = {"number_of_shards": 1, "number_of_replicas": 0} + + +def setup() -> None: + # create the percolator index if it doesn't exist + if not PercolatorDoc._index.exists(): + PercolatorDoc.init() + + # register a percolation query looking for documents about python + PercolatorDoc( + _id="python", + tags=["programming", "development", "python"], + content="", + query=Q("match", content="python"), + ).save(refresh=True) + + +def main() -> None: + # initiate the default connection to elasticsearch + connections.create_connection(hosts=[os.environ["ELASTICSEARCH_URL"]]) + + setup() + + # close the connection + connections.get_connection().close() + + +if __name__ == "__main__": + main() diff --git a/examples/dsl/search_as_you_type.py b/examples/dsl/search_as_you_type.py new file mode 100644 index 000000000..c1ebc99a4 --- /dev/null +++ b/examples/dsl/search_as_you_type.py @@ -0,0 +1,93 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +""" +Example ``Document`` with search_as_you_type field datatype and how to search it. + +When creating a field with search_as_you_type datatype ElasticSearch creates additional +subfields to enable efficient as-you-type completion, matching terms at any position +within the input. + +To custom analyzer with ascii folding allow search to work in different languages. +""" + +import os +from typing import TYPE_CHECKING, Optional + +from elasticsearch.dsl import Document, SearchAsYouType, connections, mapped_field +from elasticsearch.dsl.query import MultiMatch + + +class Person(Document): + if TYPE_CHECKING: + # definitions here help type checkers understand additional arguments + # that are allowed in the constructor + _id: Optional[int] = mapped_field(default=None) + + name: str = mapped_field(SearchAsYouType(max_shingle_size=3), default="") + + class Index: + name = "test-search-as-you-type" + settings = {"number_of_shards": 1, "number_of_replicas": 0} + + +def main() -> None: + # initiate the default connection to elasticsearch + connections.create_connection(hosts=[os.environ["ELASTICSEARCH_URL"]]) + + # create the empty index + Person.init() + + import pprint + + pprint.pprint(Person().to_dict(), indent=2) + + # index some sample data + names = [ + "Andy Warhol", + "Alphonse Mucha", + "Henri de Toulouse-Lautrec", + "Jára Cimrman", + ] + for id, name in enumerate(names): + Person(_id=id, name=name).save() + + # refresh index manually to make changes live + Person._index.refresh() + + # run some suggestions + for text in ("já", "Cimr", "toulouse", "Henri Tou", "a"): + s = Person.search() + + s.query = MultiMatch( # type: ignore[assignment] + query=text, + type="bool_prefix", + fields=["name", "name._2gram", "name._3gram"], + ) + + response = s.execute() + + # print out all the options we got + for h in response: + print("%15s: %25s" % (text, h.name)) + + # close the connection + connections.get_connection().close() + + +if __name__ == "__main__": + main() diff --git a/examples/dsl/semantic_text.py b/examples/dsl/semantic_text.py new file mode 100644 index 000000000..aff2d8097 --- /dev/null +++ b/examples/dsl/semantic_text.py @@ -0,0 +1,147 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + + +""" +# Semantic Text example + +Requirements: + +$ pip install "elasticsearch-dsl" tqdm + +Before running this example, an ELSER inference endpoint must be created in the +Elasticsearch cluster. This can be done manually from Kibana, or with the +following curl command from a terminal: + +curl -X PUT \ + "$ELASTICSEARCH_URL/_inference/sparse_embedding/my-elser-endpoint" \ + -H "Content-Type: application/json" \ + -d '{"service":"elser","service_settings":{"num_allocations":1,"num_threads":1}}' + +To run the example: + +$ python semantic_text.py "text to search" + +The index will be created automatically if it does not exist. Add +`--recreate-index` to the command to regenerate it. + +The example dataset includes a selection of workplace documents. The +following are good example queries to try out with this dataset: + +$ python semantic_text.py "work from home" +$ python semantic_text.py "vacation time" +$ python semantic_text.py "can I bring a bird to work?" + +When the index is created, the inference service will split the documents into +short passages, and for each passage a sparse embedding will be generated using +Elastic's ELSER v2 model. +""" + +import argparse +import json +import os +from datetime import datetime +from typing import Any, Optional +from urllib.request import urlopen + +from tqdm import tqdm + +from elasticsearch import dsl + +DATASET_URL = "https://raw.githubusercontent.com/elastic/elasticsearch-labs/main/datasets/workplace-documents.json" + + +class WorkplaceDoc(dsl.Document): + class Index: + name = "workplace_documents_semantic" + + name: str + summary: str + content: Any = dsl.mapped_field( + dsl.field.SemanticText(inference_id="my-elser-endpoint") + ) + created: datetime + updated: Optional[datetime] + url: str = dsl.mapped_field(dsl.Keyword()) + category: str = dsl.mapped_field(dsl.Keyword()) + + +def create() -> None: + + # create the index + WorkplaceDoc._index.delete(ignore_unavailable=True) + WorkplaceDoc.init() + + # download the data + dataset = json.loads(urlopen(DATASET_URL).read()) + + # import the dataset + for data in tqdm(dataset, desc="Indexing documents..."): + doc = WorkplaceDoc( + name=data["name"], + summary=data["summary"], + content=data["content"], + created=data.get("created_on"), + updated=data.get("updated_at"), + url=data["url"], + category=data["category"], + ) + doc.save() + + # refresh the index + WorkplaceDoc._index.refresh() + + +def search(query: str) -> dsl.Search[WorkplaceDoc]: + search = WorkplaceDoc.search() + search = search[:5] + return search.query(dsl.query.Semantic(field=WorkplaceDoc.content, query=query)) + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser(description="Vector database with Elasticsearch") + parser.add_argument( + "--recreate-index", action="store_true", help="Recreate and populate the index" + ) + parser.add_argument("query", action="store", help="The search query") + return parser.parse_args() + + +def main() -> None: + args = parse_args() + + # initiate the default connection to elasticsearch + dsl.connections.create_connection(hosts=[os.environ["ELASTICSEARCH_URL"]]) + + if args.recreate_index or not WorkplaceDoc._index.exists(): + create() + + results = search(args.query) + + for hit in results: + print( + f"Document: {hit.name} [Category: {hit.category}] [Score: {hit.meta.score}]" + ) + print(f"Content: {hit.content.text}") + print("--------------------\n") + + # close the connection + dsl.connections.get_connection().close() + + +if __name__ == "__main__": + main() diff --git a/examples/dsl/sparse_vectors.py b/examples/dsl/sparse_vectors.py new file mode 100644 index 000000000..01bb99178 --- /dev/null +++ b/examples/dsl/sparse_vectors.py @@ -0,0 +1,197 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +""" +# Sparse vector database example + +Requirements: + +$ pip install nltk tqdm elasticsearch-dsl + +Before running this example, the ELSER v2 model must be downloaded and deployed +to the Elasticsearch cluster, and an ingest pipeline must be defined. This can +be done manually from Kibana, or with the following three curl commands from a +terminal, adjusting the endpoint as needed: + +curl -X PUT \ + "http://localhost:9200/_ml/trained_models/.elser_model_2?wait_for_completion" \ + -H "Content-Type: application/json" \ + -d '{"input":{"field_names":["text_field"]}}' +curl -X POST \ + "http://localhost:9200/_ml/trained_models/.elser_model_2/deployment/_start?wait_for=fully_allocated" +curl -X PUT \ + "http://localhost:9200/_ingest/pipeline/elser_ingest_pipeline" \ + -H "Content-Type: application/json" \ + -d '{"processors":[{"foreach":{"field":"passages","processor":{"inference":{"model_id":".elser_model_2","input_output":[{"input_field":"_ingest._value.content","output_field":"_ingest._value.embedding"}]}}}}]}' + +To run the example: + +$ python sparse_vectors.py "text to search" + +The index will be created automatically if it does not exist. Add +`--recreate-index` to regenerate it. + +The example dataset includes a selection of workplace documents. The +following are good example queries to try out with this dataset: + +$ python sparse_vectors.py "work from home" +$ python sparse_vectors.py "vacation time" +$ python sparse_vectors.py "can I bring a bird to work?" + +When the index is created, the documents are split into short passages, and for +each passage a sparse embedding is generated using Elastic's ELSER v2 model. +The documents that are returned as search results are those that have the +highest scored passages. Add `--show-inner-hits` to the command to see +individual passage results as well. +""" + +import argparse +import json +import os +from datetime import datetime +from typing import Any, Dict, List, Optional +from urllib.request import urlopen + +import nltk # type: ignore +from tqdm import tqdm + +from elasticsearch.dsl import ( + Document, + InnerDoc, + Keyword, + Q, + Search, + SparseVector, + connections, + mapped_field, +) + +DATASET_URL = "https://raw.githubusercontent.com/elastic/elasticsearch-labs/main/datasets/workplace-documents.json" + +# initialize sentence tokenizer +nltk.download("punkt_tab", quiet=True) + + +class Passage(InnerDoc): + content: Optional[str] + embedding: Dict[str, float] = mapped_field(SparseVector(), init=False) + + +class WorkplaceDoc(Document): + class Index: + name = "workplace_documents_sparse" + settings = {"default_pipeline": "elser_ingest_pipeline"} + + name: str + summary: str + content: str + created: datetime + updated: Optional[datetime] + url: str = mapped_field(Keyword()) + category: str = mapped_field(Keyword()) + passages: List[Passage] = mapped_field(default=[]) + + _model: Any = None + + def clean(self) -> None: + # split the content into sentences + passages = nltk.sent_tokenize(self.content) + + # generate an embedding for each passage and save it as a nested document + for passage in passages: + self.passages.append(Passage(content=passage)) + + +def create() -> None: + + # create the index + WorkplaceDoc._index.delete(ignore_unavailable=True) + WorkplaceDoc.init() + + # download the data + dataset = json.loads(urlopen(DATASET_URL).read()) + + # import the dataset + for data in tqdm(dataset, desc="Indexing documents..."): + doc = WorkplaceDoc( + name=data["name"], + summary=data["summary"], + content=data["content"], + created=data.get("created_on"), + updated=data.get("updated_at"), + url=data["url"], + category=data["category"], + ) + doc.save() + + +def search(query: str) -> Search[WorkplaceDoc]: + return WorkplaceDoc.search()[:5].query( + "nested", + path="passages", + query=Q( + "text_expansion", + passages__content={ + "model_id": ".elser_model_2", + "model_text": query, + }, + ), + inner_hits={"size": 2}, + ) + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser(description="Vector database with Elasticsearch") + parser.add_argument( + "--recreate-index", action="store_true", help="Recreate and populate the index" + ) + parser.add_argument( + "--show-inner-hits", + action="store_true", + help="Show results for individual passages", + ) + parser.add_argument("query", action="store", help="The search query") + return parser.parse_args() + + +def main() -> None: + args = parse_args() + + # initiate the default connection to elasticsearch + connections.create_connection(hosts=[os.environ["ELASTICSEARCH_URL"]]) + + if args.recreate_index or not WorkplaceDoc._index.exists(): + create() + + results = search(args.query) + + for hit in results: + print( + f"Document: {hit.name} [Category: {hit.category}] [Score: {hit.meta.score}]" + ) + print(f"Summary: {hit.summary}") + if args.show_inner_hits: + for passage in hit.meta.inner_hits["passages"]: + print(f" - [Score: {passage.meta.score}] {passage.content!r}") + print("") + + # close the connection + connections.get_connection().close() + + +if __name__ == "__main__": + main() diff --git a/examples/dsl/vectors.py b/examples/dsl/vectors.py new file mode 100644 index 000000000..2567e2889 --- /dev/null +++ b/examples/dsl/vectors.py @@ -0,0 +1,186 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +""" +# Vector database example + +Requirements: + +$ pip install nltk sentence_transformers tqdm elasticsearch-dsl + +To run the example: + +$ python vectors.py "text to search" + +The index will be created automatically if it does not exist. Add +`--recreate-index` to regenerate it. + +The example dataset includes a selection of workplace documents. The +following are good example queries to try out with this dataset: + +$ python vectors.py "work from home" +$ python vectors.py "vacation time" +$ python vectors.py "can I bring a bird to work?" + +When the index is created, the documents are split into short passages, and for +each passage an embedding is generated using the open source +"all-MiniLM-L6-v2" model. The documents that are returned as search results are +those that have the highest scored passages. Add `--show-inner-hits` to the +command to see individual passage results as well. +""" + +import argparse +import json +import os +from datetime import datetime +from typing import Any, List, Optional, cast +from urllib.request import urlopen + +import nltk # type: ignore +from sentence_transformers import SentenceTransformer +from tqdm import tqdm + +from elasticsearch.dsl import ( + DenseVector, + Document, + InnerDoc, + Keyword, + M, + Search, + connections, + mapped_field, +) + +DATASET_URL = "https://raw.githubusercontent.com/elastic/elasticsearch-labs/main/datasets/workplace-documents.json" +MODEL_NAME = "all-MiniLM-L6-v2" + +# initialize sentence tokenizer +nltk.download("punkt_tab", quiet=True) + +# this will be the embedding model +embedding_model: Any = None + + +class Passage(InnerDoc): + content: str + embedding: List[float] = mapped_field(DenseVector()) + + +class WorkplaceDoc(Document): + class Index: + name = "workplace_documents" + + name: str + summary: str + content: str + created: datetime + updated: Optional[datetime] + url: str = mapped_field(Keyword(required=True)) + category: str = mapped_field(Keyword(required=True)) + passages: M[List[Passage]] = mapped_field(default=[]) + + @classmethod + def get_embedding(cls, input: str) -> List[float]: + global embedding_model + if embedding_model is None: + embedding_model = SentenceTransformer(MODEL_NAME) + return cast(List[float], list(embedding_model.encode(input))) + + def clean(self) -> None: + # split the content into sentences + passages = cast(List[str], nltk.sent_tokenize(self.content)) + + # generate an embedding for each passage and save it as a nested document + for passage in passages: + self.passages.append( + Passage(content=passage, embedding=self.get_embedding(passage)) + ) + + +def create() -> None: + # create the index + WorkplaceDoc._index.delete(ignore_unavailable=True) + WorkplaceDoc.init() + + # download the data + dataset = json.loads(urlopen(DATASET_URL).read()) + + # import the dataset + for data in tqdm(dataset, desc="Indexing documents..."): + doc = WorkplaceDoc( + name=data["name"], + summary=data["summary"], + content=data["content"], + created=data.get("created_on"), + updated=data.get("updated_at"), + url=data["url"], + category=data["category"], + ) + doc.save() + + +def search(query: str) -> Search[WorkplaceDoc]: + return WorkplaceDoc.search().knn( + field=WorkplaceDoc.passages.embedding, + k=5, + num_candidates=50, + query_vector=list(WorkplaceDoc.get_embedding(query)), + inner_hits={"size": 2}, + ) + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser(description="Vector database with Elasticsearch") + parser.add_argument( + "--recreate-index", action="store_true", help="Recreate and populate the index" + ) + parser.add_argument( + "--show-inner-hits", + action="store_true", + help="Show results for individual passages", + ) + parser.add_argument("query", action="store", help="The search query") + return parser.parse_args() + + +def main() -> None: + args = parse_args() + + # initiate the default connection to elasticsearch + connections.create_connection(hosts=[os.environ["ELASTICSEARCH_URL"]]) + + if args.recreate_index or not WorkplaceDoc._index.exists(): + create() + + results = search(args.query) + + for hit in results: + print( + f"Document: {hit.name} [Category: {hit.category}] [Score: {hit.meta.score}]" + ) + print(f"Summary: {hit.summary}") + if args.show_inner_hits: + for passage in hit.meta.inner_hits["passages"]: + print(f" - [Score: {passage.meta.score}] {passage.content!r}") + print("") + + # close the connection + connections.get_connection().close() + + +if __name__ == "__main__": + main() diff --git a/pyproject.toml b/pyproject.toml index b5f03e1d0..54c0378f1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -43,6 +43,8 @@ keywords = [ dynamic = ["version"] dependencies = [ "elastic-transport>=8.15.1,<9", + "python-dateutil", + "typing-extensions", ] [project.optional-dependencies] @@ -74,6 +76,14 @@ dev = [ "pyarrow", "pandas", "mapbox-vector-tile", + "jinja2", + "nltk", + "sentence_transformers", + "tqdm", + "mypy", + "pyright", + "types-python-dateutil", + "types-tqdm", ] docs = [ "sphinx", @@ -109,7 +119,16 @@ packages = ["elasticsearch"] [tool.pytest.ini_options] junit_family = "legacy" xfail_strict = true -markers = "otel" +markers = [ + "otel", + "sync: mark a test as performing I/O without asyncio.", +] +filterwarnings = [ + "error", + "ignore:Legacy index templates are deprecated in favor of composable templates.:elasticsearch.exceptions.ElasticsearchWarning", + "ignore:datetime.datetime.utcfromtimestamp() is deprecated and scheduled for removal in a future version..*:DeprecationWarning", + "default:enable_cleanup_closed ignored.*:DeprecationWarning", +] [tool.isort] profile = "black" diff --git a/test_elasticsearch/test_dsl/__init__.py b/test_elasticsearch/test_dsl/__init__.py new file mode 100644 index 000000000..2a87d183f --- /dev/null +++ b/test_elasticsearch/test_dsl/__init__.py @@ -0,0 +1,16 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/test_elasticsearch/test_dsl/_async/__init__.py b/test_elasticsearch/test_dsl/_async/__init__.py new file mode 100644 index 000000000..2a87d183f --- /dev/null +++ b/test_elasticsearch/test_dsl/_async/__init__.py @@ -0,0 +1,16 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/test_elasticsearch/test_dsl/_async/test_document.py b/test_elasticsearch/test_dsl/_async/test_document.py new file mode 100644 index 000000000..5fe2d326c --- /dev/null +++ b/test_elasticsearch/test_dsl/_async/test_document.py @@ -0,0 +1,883 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# this file creates several documents using bad or no types because +# these are still supported and should be kept functional in spite +# of not having appropriate type hints. For that reason the comment +# below disables many mypy checks that fails as a result of this. +# mypy: disable-error-code="assignment, index, arg-type, call-arg, operator, comparison-overlap, attr-defined" + +import codecs +import ipaddress +import pickle +import sys +from datetime import datetime +from hashlib import md5 +from typing import Any, ClassVar, Dict, List, Optional + +import pytest +from pytest import raises + +from elasticsearch.dsl import ( + AsyncDocument, + Index, + InnerDoc, + M, + Mapping, + MetaField, + Range, + analyzer, + field, + mapped_field, + utils, +) +from elasticsearch.dsl.document_base import InstrumentedField +from elasticsearch.dsl.exceptions import IllegalOperation, ValidationException + + +class MyInner(InnerDoc): + old_field = field.Text() + + +class MyDoc(AsyncDocument): + title = field.Keyword() + name = field.Text() + created_at = field.Date() + inner = field.Object(MyInner) + + +class MySubDoc(MyDoc): + name = field.Keyword() + + class Index: + name = "default-index" + + +class MyDoc2(AsyncDocument): + extra = field.Long() + + +class MyMultiSubDoc(MyDoc2, MySubDoc): + pass + + +class Comment(InnerDoc): + title = field.Text() + tags = field.Keyword(multi=True) + + +class DocWithNested(AsyncDocument): + comments = field.Nested(Comment) + + class Index: + name = "test-doc-with-nested" + + +class SimpleCommit(AsyncDocument): + files = field.Text(multi=True) + + class Index: + name = "test-git" + + +class Secret(str): + pass + + +class SecretField(field.CustomField): + builtin_type = "text" + + def _serialize(self, data: Any) -> Any: + return codecs.encode(data, "rot_13") + + def _deserialize(self, data: Any) -> Any: + if isinstance(data, Secret): + return data + return Secret(codecs.decode(data, "rot_13")) + + +class SecretDoc(AsyncDocument): + title = SecretField(index="no") + + class Index: + name = "test-secret-doc" + + +class NestedSecret(AsyncDocument): + secrets = field.Nested(SecretDoc) + + class Index: + name = "test-nested-secret" + + +class OptionalObjectWithRequiredField(AsyncDocument): + comments = field.Nested(properties={"title": field.Keyword(required=True)}) + + class Index: + name = "test-required" + + +class Host(AsyncDocument): + ip = field.Ip() + + class Index: + name = "test-host" + + +def test_range_serializes_properly() -> None: + class D(AsyncDocument): + lr: Range[int] = field.LongRange() + + d = D(lr=Range(lt=42)) + assert 40 in d.lr + assert 47 not in d.lr + assert {"lr": {"lt": 42}} == d.to_dict() + + d = D(lr={"lt": 42}) + assert {"lr": {"lt": 42}} == d.to_dict() + + +def test_range_deserializes_properly() -> None: + class D(InnerDoc): + lr = field.LongRange() + + d = D.from_es({"lr": {"lt": 42}}, True) + assert isinstance(d.lr, Range) + assert 40 in d.lr + assert 47 not in d.lr + + +def test_resolve_nested() -> None: + nested, field = NestedSecret._index.resolve_nested("secrets.title") + assert nested == ["secrets"] + assert field is NestedSecret._doc_type.mapping["secrets"]["title"] + + +def test_conflicting_mapping_raises_error_in_index_to_dict() -> None: + class A(AsyncDocument): + name = field.Text() + + class B(AsyncDocument): + name = field.Keyword() + + i = Index("i") + i.document(A) + i.document(B) + + with raises(ValueError): + i.to_dict() + + +def test_ip_address_serializes_properly() -> None: + host = Host(ip=ipaddress.IPv4Address("10.0.0.1")) + + assert {"ip": "10.0.0.1"} == host.to_dict() + + +def test_matches_uses_index() -> None: + assert SimpleCommit._matches({"_index": "test-git"}) + assert not SimpleCommit._matches({"_index": "not-test-git"}) + + +def test_matches_with_no_name_always_matches() -> None: + class D(AsyncDocument): + pass + + assert D._matches({}) + assert D._matches({"_index": "whatever"}) + + +def test_matches_accepts_wildcards() -> None: + class MyDoc(AsyncDocument): + class Index: + name = "my-*" + + assert MyDoc._matches({"_index": "my-index"}) + assert not MyDoc._matches({"_index": "not-my-index"}) + + +def test_assigning_attrlist_to_field() -> None: + sc = SimpleCommit() + l = ["README", "README.rst"] + sc.files = utils.AttrList(l) + + assert sc.to_dict()["files"] is l + + +def test_optional_inner_objects_are_not_validated_if_missing() -> None: + d = OptionalObjectWithRequiredField() + + d.full_clean() + + +def test_custom_field() -> None: + s = SecretDoc(title=Secret("Hello")) + + assert {"title": "Uryyb"} == s.to_dict() + assert s.title == "Hello" + + s = SecretDoc.from_es({"_source": {"title": "Uryyb"}}) + assert s.title == "Hello" + assert isinstance(s.title, Secret) + + +def test_custom_field_mapping() -> None: + assert { + "properties": {"title": {"index": "no", "type": "text"}} + } == SecretDoc._doc_type.mapping.to_dict() + + +def test_custom_field_in_nested() -> None: + s = NestedSecret() + s.secrets.append(SecretDoc(title=Secret("Hello"))) + + assert {"secrets": [{"title": "Uryyb"}]} == s.to_dict() + assert s.secrets[0].title == "Hello" + + +def test_multi_works_after_doc_has_been_saved() -> None: + c = SimpleCommit() + c.full_clean() + c.files.append("setup.py") + + assert c.to_dict() == {"files": ["setup.py"]} + + +def test_multi_works_in_nested_after_doc_has_been_serialized() -> None: + # Issue #359 + c = DocWithNested(comments=[Comment(title="First!")]) + + assert [] == c.comments[0].tags + assert {"comments": [{"title": "First!"}]} == c.to_dict() + assert [] == c.comments[0].tags + + +def test_null_value_for_object() -> None: + d = MyDoc(inner=None) + + assert d.inner is None + + +def test_inherited_doc_types_can_override_index() -> None: + class MyDocDifferentIndex(MySubDoc): + class Index: + name = "not-default-index" + settings = {"number_of_replicas": 0} + aliases: Dict[str, Any] = {"a": {}} + analyzers = [analyzer("my_analizer", tokenizer="keyword")] + + assert MyDocDifferentIndex._index._name == "not-default-index" + assert MyDocDifferentIndex()._get_index() == "not-default-index" + assert MyDocDifferentIndex._index.to_dict() == { + "aliases": {"a": {}}, + "mappings": { + "properties": { + "created_at": {"type": "date"}, + "inner": { + "type": "object", + "properties": {"old_field": {"type": "text"}}, + }, + "name": {"type": "keyword"}, + "title": {"type": "keyword"}, + } + }, + "settings": { + "analysis": { + "analyzer": {"my_analizer": {"tokenizer": "keyword", "type": "custom"}} + }, + "number_of_replicas": 0, + }, + } + + +def test_to_dict_with_meta() -> None: + d = MySubDoc(title="hello") + d.meta.routing = "some-parent" + + assert { + "_index": "default-index", + "_routing": "some-parent", + "_source": {"title": "hello"}, + } == d.to_dict(True) + + +def test_to_dict_with_meta_includes_custom_index() -> None: + d = MySubDoc(title="hello") + d.meta.index = "other-index" + + assert {"_index": "other-index", "_source": {"title": "hello"}} == d.to_dict(True) + + +def test_to_dict_without_skip_empty_will_include_empty_fields() -> None: + d = MySubDoc(tags=[], title=None, inner={}) + + assert {} == d.to_dict() + assert {"tags": [], "title": None, "inner": {}} == d.to_dict(skip_empty=False) + + +def test_attribute_can_be_removed() -> None: + d = MyDoc(title="hello") + + del d.title + assert "title" not in d._d_ + + +def test_doc_type_can_be_correctly_pickled() -> None: + d = DocWithNested( + title="Hello World!", comments=[Comment(title="hellp")], meta={"id": 42} + ) + s = pickle.dumps(d) + + d2 = pickle.loads(s) + + assert d2 == d + assert 42 == d2.meta.id + assert "Hello World!" == d2.title + assert [{"title": "hellp"}] == d2.comments + assert isinstance(d2.comments[0], Comment) + + +def test_meta_is_accessible_even_on_empty_doc() -> None: + d = MyDoc() + d.meta + + d = MyDoc(title="aaa") + d.meta + + +def test_meta_field_mapping() -> None: + class User(AsyncDocument): + username = field.Text() + + class Meta: + all = MetaField(enabled=False) + _index = MetaField(enabled=True) + dynamic = MetaField("strict") + dynamic_templates = MetaField([42]) + + assert { + "properties": {"username": {"type": "text"}}, + "_all": {"enabled": False}, + "_index": {"enabled": True}, + "dynamic": "strict", + "dynamic_templates": [42], + } == User._doc_type.mapping.to_dict() + + +def test_multi_value_fields() -> None: + class Blog(AsyncDocument): + tags = field.Keyword(multi=True) + + b = Blog() + assert [] == b.tags + b.tags.append("search") + b.tags.append("python") + assert ["search", "python"] == b.tags + + +def test_docs_with_properties() -> None: + class User(AsyncDocument): + pwd_hash: str = field.Text() + + def check_password(self, pwd: bytes) -> bool: + return md5(pwd).hexdigest() == self.pwd_hash + + @property + def password(self) -> None: + raise AttributeError("readonly") + + @password.setter + def password(self, pwd: bytes) -> None: + self.pwd_hash = md5(pwd).hexdigest() + + u = User(pwd_hash=md5(b"secret").hexdigest()) + assert u.check_password(b"secret") + assert not u.check_password(b"not-secret") + + u.password = b"not-secret" + assert "password" not in u._d_ + assert not u.check_password(b"secret") + assert u.check_password(b"not-secret") + + with raises(AttributeError): + u.password + + +def test_nested_can_be_assigned_to() -> None: + d1 = DocWithNested(comments=[Comment(title="First!")]) + d2 = DocWithNested() + + d2.comments = d1.comments + assert isinstance(d1.comments[0], Comment) + assert d2.comments == [{"title": "First!"}] + assert {"comments": [{"title": "First!"}]} == d2.to_dict() + assert isinstance(d2.comments[0], Comment) + + +def test_nested_can_be_none() -> None: + d = DocWithNested(comments=None, title="Hello World!") + + assert {"title": "Hello World!"} == d.to_dict() + + +def test_nested_defaults_to_list_and_can_be_updated() -> None: + md = DocWithNested() + + assert [] == md.comments + + md.comments.append({"title": "hello World!"}) + assert {"comments": [{"title": "hello World!"}]} == md.to_dict() + + +def test_to_dict_is_recursive_and_can_cope_with_multi_values() -> None: + md = MyDoc(name=["a", "b", "c"]) + md.inner = [MyInner(old_field="of1"), MyInner(old_field="of2")] + + assert isinstance(md.inner[0], MyInner) + + assert { + "name": ["a", "b", "c"], + "inner": [{"old_field": "of1"}, {"old_field": "of2"}], + } == md.to_dict() + + +def test_to_dict_ignores_empty_collections() -> None: + md = MySubDoc(name="", address={}, count=0, valid=False, tags=[]) + + assert {"name": "", "count": 0, "valid": False} == md.to_dict() + + +def test_declarative_mapping_definition() -> None: + assert issubclass(MyDoc, AsyncDocument) + assert hasattr(MyDoc, "_doc_type") + assert { + "properties": { + "created_at": {"type": "date"}, + "name": {"type": "text"}, + "title": {"type": "keyword"}, + "inner": {"type": "object", "properties": {"old_field": {"type": "text"}}}, + } + } == MyDoc._doc_type.mapping.to_dict() + + +def test_you_can_supply_own_mapping_instance() -> None: + class MyD(AsyncDocument): + title = field.Text() + + class Meta: + mapping = Mapping() + mapping.meta("_all", enabled=False) + + assert { + "_all": {"enabled": False}, + "properties": {"title": {"type": "text"}}, + } == MyD._doc_type.mapping.to_dict() + + +def test_document_can_be_created_dynamically() -> None: + n = datetime.now() + md = MyDoc(title="hello") + md.name = "My Fancy Document!" + md.created_at = n + + inner = md.inner + # consistent returns + assert inner is md.inner + inner.old_field = "Already defined." + + md.inner.new_field = ["undefined", "field"] + + assert { + "title": "hello", + "name": "My Fancy Document!", + "created_at": n, + "inner": {"old_field": "Already defined.", "new_field": ["undefined", "field"]}, + } == md.to_dict() + + +def test_invalid_date_will_raise_exception() -> None: + md = MyDoc() + md.created_at = "not-a-date" + with raises(ValidationException): + md.full_clean() + + +def test_document_inheritance() -> None: + assert issubclass(MySubDoc, MyDoc) + assert issubclass(MySubDoc, AsyncDocument) + assert hasattr(MySubDoc, "_doc_type") + assert { + "properties": { + "created_at": {"type": "date"}, + "name": {"type": "keyword"}, + "title": {"type": "keyword"}, + "inner": {"type": "object", "properties": {"old_field": {"type": "text"}}}, + } + } == MySubDoc._doc_type.mapping.to_dict() + + +def test_child_class_can_override_parent() -> None: + class A(AsyncDocument): + o = field.Object(dynamic=False, properties={"a": field.Text()}) + + class B(A): + o = field.Object(dynamic="strict", properties={"b": field.Text()}) + + assert { + "properties": { + "o": { + "dynamic": "strict", + "properties": {"a": {"type": "text"}, "b": {"type": "text"}}, + "type": "object", + } + } + } == B._doc_type.mapping.to_dict() + + +def test_meta_fields_are_stored_in_meta_and_ignored_by_to_dict() -> None: + md = MySubDoc(meta={"id": 42}, name="My First doc!") + + md.meta.index = "my-index" + assert md.meta.index == "my-index" + assert md.meta.id == 42 + assert {"name": "My First doc!"} == md.to_dict() + assert {"id": 42, "index": "my-index"} == md.meta.to_dict() + + +def test_index_inheritance() -> None: + assert issubclass(MyMultiSubDoc, MySubDoc) + assert issubclass(MyMultiSubDoc, MyDoc2) + assert issubclass(MyMultiSubDoc, AsyncDocument) + assert hasattr(MyMultiSubDoc, "_doc_type") + assert hasattr(MyMultiSubDoc, "_index") + assert { + "properties": { + "created_at": {"type": "date"}, + "name": {"type": "keyword"}, + "title": {"type": "keyword"}, + "inner": {"type": "object", "properties": {"old_field": {"type": "text"}}}, + "extra": {"type": "long"}, + } + } == MyMultiSubDoc._doc_type.mapping.to_dict() + + +def test_meta_fields_can_be_set_directly_in_init() -> None: + p = object() + md = MyDoc(_id=p, title="Hello World!") + + assert md.meta.id is p + + +@pytest.mark.asyncio +async def test_save_no_index(async_mock_client: Any) -> None: + md = MyDoc() + with raises(ValidationException): + await md.save(using="mock") + + +@pytest.mark.asyncio +async def test_delete_no_index(async_mock_client: Any) -> None: + md = MyDoc() + with raises(ValidationException): + await md.delete(using="mock") + + +@pytest.mark.asyncio +async def test_update_no_fields() -> None: + md = MyDoc() + with raises(IllegalOperation): + await md.update() + + +def test_search_with_custom_alias_and_index() -> None: + search_object = MyDoc.search( + using="staging", index=["custom_index1", "custom_index2"] + ) + + assert search_object._using == "staging" + assert search_object._index == ["custom_index1", "custom_index2"] + + +def test_from_es_respects_underscored_non_meta_fields() -> None: + doc = { + "_index": "test-index", + "_id": "elasticsearch", + "_score": 12.0, + "fields": {"hello": "world", "_routing": "es", "_tags": ["search"]}, + "_source": { + "city": "Amsterdam", + "name": "Elasticsearch", + "_tagline": "You know, for search", + }, + } + + class Company(AsyncDocument): + class Index: + name = "test-company" + + c = Company.from_es(doc) + + assert c.meta.fields._tags == ["search"] + assert c.meta.fields._routing == "es" + assert c._tagline == "You know, for search" + + +def test_nested_and_object_inner_doc() -> None: + class MySubDocWithNested(MyDoc): + nested_inner = field.Nested(MyInner) + + props = MySubDocWithNested._doc_type.mapping.to_dict()["properties"] + assert props == { + "created_at": {"type": "date"}, + "inner": {"properties": {"old_field": {"type": "text"}}, "type": "object"}, + "name": {"type": "text"}, + "nested_inner": { + "properties": {"old_field": {"type": "text"}}, + "type": "nested", + }, + "title": {"type": "keyword"}, + } + + +def test_doc_with_type_hints() -> None: + class TypedInnerDoc(InnerDoc): + st: M[str] + dt: M[Optional[datetime]] + li: M[List[int]] + + class TypedDoc(AsyncDocument): + st: str + dt: Optional[datetime] + li: List[int] + ob: TypedInnerDoc + ns: List[TypedInnerDoc] + ip: Optional[str] = field.Ip() + k1: str = field.Keyword(required=True) + k2: M[str] = field.Keyword() + k3: str = mapped_field(field.Keyword(), default="foo") + k4: M[Optional[str]] = mapped_field(field.Keyword()) # type: ignore[misc] + s1: Secret = SecretField() + s2: M[Secret] = SecretField() + s3: Secret = mapped_field(SecretField()) # type: ignore[misc] + s4: M[Optional[Secret]] = mapped_field( + SecretField(), default_factory=lambda: "foo" + ) + i1: ClassVar + i2: ClassVar[int] + + props = TypedDoc._doc_type.mapping.to_dict()["properties"] + assert props == { + "st": {"type": "text"}, + "dt": {"type": "date"}, + "li": {"type": "integer"}, + "ob": { + "type": "object", + "properties": { + "st": {"type": "text"}, + "dt": {"type": "date"}, + "li": {"type": "integer"}, + }, + }, + "ns": { + "type": "nested", + "properties": { + "st": {"type": "text"}, + "dt": {"type": "date"}, + "li": {"type": "integer"}, + }, + }, + "ip": {"type": "ip"}, + "k1": {"type": "keyword"}, + "k2": {"type": "keyword"}, + "k3": {"type": "keyword"}, + "k4": {"type": "keyword"}, + "s1": {"type": "text"}, + "s2": {"type": "text"}, + "s3": {"type": "text"}, + "s4": {"type": "text"}, + } + + TypedDoc.i1 = "foo" + TypedDoc.i2 = 123 + + doc = TypedDoc() + assert doc.k3 == "foo" + assert doc.s4 == "foo" + with raises(ValidationException) as exc_info: + doc.full_clean() + assert set(exc_info.value.args[0].keys()) == { + "st", + "k1", + "k2", + "ob", + "s1", + "s2", + "s3", + } + + assert TypedDoc.i1 == "foo" + assert TypedDoc.i2 == 123 + + doc.st = "s" + doc.li = [1, 2, 3] + doc.k1 = "k1" + doc.k2 = "k2" + doc.ob.st = "s" + doc.ob.li = [1] + doc.s1 = "s1" + doc.s2 = "s2" + doc.s3 = "s3" + doc.full_clean() + + doc.ob = TypedInnerDoc(li=[1]) + with raises(ValidationException) as exc_info: + doc.full_clean() + assert set(exc_info.value.args[0].keys()) == {"ob"} + assert set(exc_info.value.args[0]["ob"][0].args[0].keys()) == {"st"} + + doc.ob.st = "s" + doc.ns.append(TypedInnerDoc(li=[1, 2])) + with raises(ValidationException) as exc_info: + doc.full_clean() + + doc.ns[0].st = "s" + doc.full_clean() + + doc.ip = "1.2.3.4" + n = datetime.now() + doc.dt = n + assert doc.to_dict() == { + "st": "s", + "li": [1, 2, 3], + "dt": n, + "ob": { + "st": "s", + "li": [1], + }, + "ns": [ + { + "st": "s", + "li": [1, 2], + } + ], + "ip": "1.2.3.4", + "k1": "k1", + "k2": "k2", + "k3": "foo", + "s1": "s1", + "s2": "s2", + "s3": "s3", + "s4": "foo", + } + + s = TypedDoc.search().sort(TypedDoc.st, -TypedDoc.dt, +TypedDoc.ob.st) + s.aggs.bucket("terms_agg", "terms", field=TypedDoc.k1) + assert s.to_dict() == { + "aggs": {"terms_agg": {"terms": {"field": "k1"}}}, + "sort": ["st", {"dt": {"order": "desc"}}, "ob.st"], + } + + +@pytest.mark.skipif(sys.version_info < (3, 10), reason="requires Python 3.10") +def test_doc_with_pipe_type_hints() -> None: + with pytest.raises(TypeError): + + class BadlyTypedDoc(AsyncDocument): + s: str + f: str | int | None # type: ignore[syntax] + + class TypedDoc(AsyncDocument): + s: str + f1: str | None # type: ignore[syntax] + f2: M[int | None] # type: ignore[syntax] + f3: M[datetime | None] # type: ignore[syntax] + + props = TypedDoc._doc_type.mapping.to_dict()["properties"] + assert props == { + "s": {"type": "text"}, + "f1": {"type": "text"}, + "f2": {"type": "integer"}, + "f3": {"type": "date"}, + } + + doc = TypedDoc() + with raises(ValidationException) as exc_info: + doc.full_clean() + assert set(exc_info.value.args[0].keys()) == {"s"} + doc.s = "s" + doc.full_clean() + + +def test_instrumented_field() -> None: + class Child(InnerDoc): + st: M[str] + + class Doc(AsyncDocument): + st: str + ob: Child + ns: List[Child] + + doc = Doc( + st="foo", + ob=Child(st="bar"), + ns=[ + Child(st="baz"), + Child(st="qux"), + ], + ) + + assert type(doc.st) is str + assert doc.st == "foo" + + assert type(doc.ob) is Child + assert doc.ob.st == "bar" + + assert type(doc.ns) is utils.AttrList + assert doc.ns[0].st == "baz" + assert doc.ns[1].st == "qux" + assert type(doc.ns[0]) is Child + assert type(doc.ns[1]) is Child + + assert type(Doc.st) is InstrumentedField + assert str(Doc.st) == "st" + assert +Doc.st == "st" + assert -Doc.st == "-st" + assert Doc.st.to_dict() == {"type": "text"} + with raises(AttributeError): + Doc.st.something + + assert type(Doc.ob) is InstrumentedField + assert str(Doc.ob) == "ob" + assert str(Doc.ob.st) == "ob.st" + assert +Doc.ob.st == "ob.st" + assert -Doc.ob.st == "-ob.st" + assert Doc.ob.st.to_dict() == {"type": "text"} + with raises(AttributeError): + Doc.ob.something + with raises(AttributeError): + Doc.ob.st.something + + assert type(Doc.ns) is InstrumentedField + assert str(Doc.ns) == "ns" + assert str(Doc.ns.st) == "ns.st" + assert +Doc.ns.st == "ns.st" + assert -Doc.ns.st == "-ns.st" + assert Doc.ns.st.to_dict() == {"type": "text"} + with raises(AttributeError): + Doc.ns.something + with raises(AttributeError): + Doc.ns.st.something diff --git a/test_elasticsearch/test_dsl/_async/test_faceted_search.py b/test_elasticsearch/test_dsl/_async/test_faceted_search.py new file mode 100644 index 000000000..e3bd30850 --- /dev/null +++ b/test_elasticsearch/test_dsl/_async/test_faceted_search.py @@ -0,0 +1,201 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from datetime import datetime + +import pytest + +from elasticsearch.dsl.faceted_search import ( + AsyncFacetedSearch, + DateHistogramFacet, + TermsFacet, +) + + +class BlogSearch(AsyncFacetedSearch): + doc_types = ["user", "post"] + fields = [ + "title^5", + "body", + ] + + facets = { + "category": TermsFacet(field="category.raw"), + "tags": TermsFacet(field="tags"), + } + + +def test_query_is_created_properly() -> None: + bs = BlogSearch("python search") + s = bs.build_search() + + assert s._doc_type == ["user", "post"] + assert { + "aggs": { + "_filter_tags": { + "filter": {"match_all": {}}, + "aggs": {"tags": {"terms": {"field": "tags"}}}, + }, + "_filter_category": { + "filter": {"match_all": {}}, + "aggs": {"category": {"terms": {"field": "category.raw"}}}, + }, + }, + "query": { + "multi_match": {"fields": ["title^5", "body"], "query": "python search"} + }, + "highlight": {"fields": {"body": {}, "title": {}}}, + } == s.to_dict() + + +def test_query_is_created_properly_with_sort_tuple() -> None: + bs = BlogSearch("python search", sort=("category", "-title")) + s = bs.build_search() + + assert s._doc_type == ["user", "post"] + assert { + "aggs": { + "_filter_tags": { + "filter": {"match_all": {}}, + "aggs": {"tags": {"terms": {"field": "tags"}}}, + }, + "_filter_category": { + "filter": {"match_all": {}}, + "aggs": {"category": {"terms": {"field": "category.raw"}}}, + }, + }, + "query": { + "multi_match": {"fields": ["title^5", "body"], "query": "python search"} + }, + "highlight": {"fields": {"body": {}, "title": {}}}, + "sort": ["category", {"title": {"order": "desc"}}], + } == s.to_dict() + + +def test_filter_is_applied_to_search_but_not_relevant_facet() -> None: + bs = BlogSearch("python search", filters={"category": "elastic"}) + s = bs.build_search() + + assert { + "aggs": { + "_filter_tags": { + "filter": {"terms": {"category.raw": ["elastic"]}}, + "aggs": {"tags": {"terms": {"field": "tags"}}}, + }, + "_filter_category": { + "filter": {"match_all": {}}, + "aggs": {"category": {"terms": {"field": "category.raw"}}}, + }, + }, + "post_filter": {"terms": {"category.raw": ["elastic"]}}, + "query": { + "multi_match": {"fields": ["title^5", "body"], "query": "python search"} + }, + "highlight": {"fields": {"body": {}, "title": {}}}, + } == s.to_dict() + + +def test_filters_are_applied_to_search_ant_relevant_facets() -> None: + bs = BlogSearch( + "python search", filters={"category": "elastic", "tags": ["python", "django"]} + ) + s = bs.build_search() + + d = s.to_dict() + + # we need to test post_filter without relying on order + f = d["post_filter"]["bool"].pop("must") + assert len(f) == 2 + assert {"terms": {"category.raw": ["elastic"]}} in f + assert {"terms": {"tags": ["python", "django"]}} in f + + assert { + "aggs": { + "_filter_tags": { + "filter": {"terms": {"category.raw": ["elastic"]}}, + "aggs": {"tags": {"terms": {"field": "tags"}}}, + }, + "_filter_category": { + "filter": {"terms": {"tags": ["python", "django"]}}, + "aggs": {"category": {"terms": {"field": "category.raw"}}}, + }, + }, + "query": { + "multi_match": {"fields": ["title^5", "body"], "query": "python search"} + }, + "post_filter": {"bool": {}}, + "highlight": {"fields": {"body": {}, "title": {}}}, + } == d + + +def test_date_histogram_facet_with_1970_01_01_date() -> None: + dhf = DateHistogramFacet() + assert dhf.get_value({"key": None}) == datetime(1970, 1, 1, 0, 0) # type: ignore[arg-type] + assert dhf.get_value({"key": 0}) == datetime(1970, 1, 1, 0, 0) # type: ignore[arg-type] + + +@pytest.mark.parametrize( + ["interval_type", "interval"], + [ + ("interval", "year"), + ("calendar_interval", "year"), + ("interval", "month"), + ("calendar_interval", "month"), + ("interval", "week"), + ("calendar_interval", "week"), + ("interval", "day"), + ("calendar_interval", "day"), + ("fixed_interval", "day"), + ("interval", "hour"), + ("fixed_interval", "hour"), + ("interval", "1Y"), + ("calendar_interval", "1Y"), + ("interval", "1M"), + ("calendar_interval", "1M"), + ("interval", "1w"), + ("calendar_interval", "1w"), + ("interval", "1d"), + ("calendar_interval", "1d"), + ("fixed_interval", "1d"), + ("interval", "1h"), + ("fixed_interval", "1h"), + ], +) +def test_date_histogram_interval_types(interval_type: str, interval: str) -> None: + dhf = DateHistogramFacet(field="@timestamp", **{interval_type: interval}) + assert dhf.get_aggregation().to_dict() == { + "date_histogram": { + "field": "@timestamp", + interval_type: interval, + "min_doc_count": 0, + } + } + dhf.get_value_filter(datetime.now()) + + +def test_date_histogram_no_interval_keyerror() -> None: + dhf = DateHistogramFacet(field="@timestamp") + with pytest.raises(KeyError) as e: + dhf.get_value_filter(datetime.now()) + assert str(e.value) == "'interval'" + + +def test_params_added_to_search() -> None: + bs = BlogSearch("python search") + assert bs._s._params == {} + bs.params(routing="42") + assert bs._s._params == {"routing": "42"} diff --git a/test_elasticsearch/test_dsl/_async/test_index.py b/test_elasticsearch/test_dsl/_async/test_index.py new file mode 100644 index 000000000..624bab79a --- /dev/null +++ b/test_elasticsearch/test_dsl/_async/test_index.py @@ -0,0 +1,197 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import string +from random import choice +from typing import Any, Dict + +import pytest +from pytest import raises + +from elasticsearch.dsl import ( + AsyncDocument, + AsyncIndex, + AsyncIndexTemplate, + Date, + Text, + analyzer, +) + + +class Post(AsyncDocument): + title = Text() + published_from = Date() + + +def test_multiple_doc_types_will_combine_mappings() -> None: + class User(AsyncDocument): + username = Text() + + i = AsyncIndex("i") + i.document(Post) + i.document(User) + assert { + "mappings": { + "properties": { + "title": {"type": "text"}, + "username": {"type": "text"}, + "published_from": {"type": "date"}, + } + } + } == i.to_dict() + + +def test_search_is_limited_to_index_name() -> None: + i = AsyncIndex("my-index") + s = i.search() + + assert s._index == ["my-index"] + + +def test_cloned_index_has_copied_settings_and_using() -> None: + client = object() + i = AsyncIndex("my-index", using=client) # type: ignore[arg-type] + i.settings(number_of_shards=1) + + i2 = i.clone("my-other-index") + + assert "my-other-index" == i2._name + assert client is i2._using + assert i._settings == i2._settings + assert i._settings is not i2._settings + + +def test_cloned_index_has_analysis_attribute() -> None: + """ + Regression test for Issue #582 in which `AsyncIndex.clone()` was not copying + over the `_analysis` attribute. + """ + client = object() + i = AsyncIndex("my-index", using=client) # type: ignore[arg-type] + + random_analyzer_name = "".join(choice(string.ascii_letters) for _ in range(100)) + random_analyzer = analyzer( + random_analyzer_name, tokenizer="standard", filter="standard" + ) + + i.analyzer(random_analyzer) + + i2 = i.clone("my-clone-index") + + assert i.to_dict()["settings"]["analysis"] == i2.to_dict()["settings"]["analysis"] + + +def test_settings_are_saved() -> None: + i = AsyncIndex("i") + i.settings(number_of_replicas=0) + i.settings(number_of_shards=1) + + assert {"settings": {"number_of_shards": 1, "number_of_replicas": 0}} == i.to_dict() + + +def test_registered_doc_type_included_in_to_dict() -> None: + i = AsyncIndex("i", using="alias") + i.document(Post) + + assert { + "mappings": { + "properties": { + "title": {"type": "text"}, + "published_from": {"type": "date"}, + } + } + } == i.to_dict() + + +def test_registered_doc_type_included_in_search() -> None: + i = AsyncIndex("i", using="alias") + i.document(Post) + + s = i.search() + + assert s._doc_type == [Post] + + +def test_aliases_add_to_object() -> None: + random_alias = "".join(choice(string.ascii_letters) for _ in range(100)) + alias_dict: Dict[str, Any] = {random_alias: {}} + + index = AsyncIndex("i", using="alias") + index.aliases(**alias_dict) + + assert index._aliases == alias_dict + + +def test_aliases_returned_from_to_dict() -> None: + random_alias = "".join(choice(string.ascii_letters) for _ in range(100)) + alias_dict: Dict[str, Any] = {random_alias: {}} + + index = AsyncIndex("i", using="alias") + index.aliases(**alias_dict) + + assert index._aliases == index.to_dict()["aliases"] == alias_dict + + +def test_analyzers_added_to_object() -> None: + random_analyzer_name = "".join(choice(string.ascii_letters) for _ in range(100)) + random_analyzer = analyzer( + random_analyzer_name, tokenizer="standard", filter="standard" + ) + + index = AsyncIndex("i", using="alias") + index.analyzer(random_analyzer) + + assert index._analysis["analyzer"][random_analyzer_name] == { + "filter": ["standard"], + "type": "custom", + "tokenizer": "standard", + } + + +def test_analyzers_returned_from_to_dict() -> None: + random_analyzer_name = "".join(choice(string.ascii_letters) for _ in range(100)) + random_analyzer = analyzer( + random_analyzer_name, tokenizer="standard", filter="standard" + ) + index = AsyncIndex("i", using="alias") + index.analyzer(random_analyzer) + + assert index.to_dict()["settings"]["analysis"]["analyzer"][ + random_analyzer_name + ] == {"filter": ["standard"], "type": "custom", "tokenizer": "standard"} + + +def test_conflicting_analyzer_raises_error() -> None: + i = AsyncIndex("i") + i.analyzer("my_analyzer", tokenizer="whitespace", filter=["lowercase", "stop"]) + + with raises(ValueError): + i.analyzer("my_analyzer", tokenizer="keyword", filter=["lowercase", "stop"]) + + +def test_index_template_can_have_order() -> None: + i = AsyncIndex("i-*") + it = i.as_template("i", order=2) + + assert {"index_patterns": ["i-*"], "order": 2} == it.to_dict() + + +@pytest.mark.asyncio +async def test_index_template_save_result(async_mock_client: Any) -> None: + it = AsyncIndexTemplate("test-template", "test-*") + + assert await it.save(using="mock") == await async_mock_client.indices.put_template() diff --git a/test_elasticsearch/test_dsl/_async/test_mapping.py b/test_elasticsearch/test_dsl/_async/test_mapping.py new file mode 100644 index 000000000..93da49fae --- /dev/null +++ b/test_elasticsearch/test_dsl/_async/test_mapping.py @@ -0,0 +1,222 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import json + +from elasticsearch.dsl import AsyncMapping, Keyword, Nested, Text, analysis + + +def test_mapping_can_has_fields() -> None: + m = AsyncMapping() + m.field("name", "text").field("tags", "keyword") + + assert { + "properties": {"name": {"type": "text"}, "tags": {"type": "keyword"}} + } == m.to_dict() + + +def test_mapping_update_is_recursive() -> None: + m1 = AsyncMapping() + m1.field("title", "text") + m1.field("author", "object") + m1.field("author", "object", properties={"name": {"type": "text"}}) + m1.meta("_all", enabled=False) + m1.meta("dynamic", False) + + m2 = AsyncMapping() + m2.field("published_from", "date") + m2.field("author", "object", properties={"email": {"type": "text"}}) + m2.field("title", "text") + m2.field("lang", "keyword") + m2.meta("_analyzer", path="lang") + + m1.update(m2, update_only=True) + + assert { + "_all": {"enabled": False}, + "_analyzer": {"path": "lang"}, + "dynamic": False, + "properties": { + "published_from": {"type": "date"}, + "title": {"type": "text"}, + "lang": {"type": "keyword"}, + "author": { + "type": "object", + "properties": {"name": {"type": "text"}, "email": {"type": "text"}}, + }, + }, + } == m1.to_dict() + + +def test_properties_can_iterate_over_all_the_fields() -> None: + m = AsyncMapping() + m.field("f1", "text", test_attr="f1", fields={"f2": Keyword(test_attr="f2")}) + m.field("f3", Nested(test_attr="f3", properties={"f4": Text(test_attr="f4")})) + + assert {"f1", "f2", "f3", "f4"} == { + f.test_attr for f in m.properties._collect_fields() + } + + +def test_mapping_can_collect_all_analyzers_and_normalizers() -> None: + a1 = analysis.analyzer( + "my_analyzer1", + tokenizer="keyword", + filter=[ + "lowercase", + analysis.token_filter("my_filter1", "stop", stopwords=["a", "b"]), + ], + ) + a2 = analysis.analyzer("english") + a3 = analysis.analyzer("unknown_custom") + a4 = analysis.analyzer( + "my_analyzer2", + tokenizer=analysis.tokenizer("trigram", "nGram", min_gram=3, max_gram=3), + filter=[analysis.token_filter("my_filter2", "stop", stopwords=["c", "d"])], + ) + a5 = analysis.analyzer("my_analyzer3", tokenizer="keyword") + n1 = analysis.normalizer("my_normalizer1", filter=["lowercase"]) + n2 = analysis.normalizer( + "my_normalizer2", + filter=[ + "my_filter1", + "my_filter2", + analysis.token_filter("my_filter3", "stop", stopwords=["e", "f"]), + ], + ) + n3 = analysis.normalizer("unknown_custom") + + m = AsyncMapping() + m.field( + "title", + "text", + analyzer=a1, + fields={"english": Text(analyzer=a2), "unknown": Keyword(search_analyzer=a3)}, + ) + m.field("comments", Nested(properties={"author": Text(analyzer=a4)})) + m.field("normalized_title", "keyword", normalizer=n1) + m.field("normalized_comment", "keyword", normalizer=n2) + m.field("unknown", "keyword", normalizer=n3) + m.meta("_all", analyzer=a5) + + assert { + "analyzer": { + "my_analyzer1": { + "filter": ["lowercase", "my_filter1"], + "tokenizer": "keyword", + "type": "custom", + }, + "my_analyzer2": { + "filter": ["my_filter2"], + "tokenizer": "trigram", + "type": "custom", + }, + "my_analyzer3": {"tokenizer": "keyword", "type": "custom"}, + }, + "normalizer": { + "my_normalizer1": {"filter": ["lowercase"], "type": "custom"}, + "my_normalizer2": { + "filter": ["my_filter1", "my_filter2", "my_filter3"], + "type": "custom", + }, + }, + "filter": { + "my_filter1": {"stopwords": ["a", "b"], "type": "stop"}, + "my_filter2": {"stopwords": ["c", "d"], "type": "stop"}, + "my_filter3": {"stopwords": ["e", "f"], "type": "stop"}, + }, + "tokenizer": {"trigram": {"max_gram": 3, "min_gram": 3, "type": "nGram"}}, + } == m._collect_analysis() + + assert json.loads(json.dumps(m.to_dict())) == m.to_dict() + + +def test_mapping_can_collect_multiple_analyzers() -> None: + a1 = analysis.analyzer( + "my_analyzer1", + tokenizer="keyword", + filter=[ + "lowercase", + analysis.token_filter("my_filter1", "stop", stopwords=["a", "b"]), + ], + ) + a2 = analysis.analyzer( + "my_analyzer2", + tokenizer=analysis.tokenizer("trigram", "nGram", min_gram=3, max_gram=3), + filter=[analysis.token_filter("my_filter2", "stop", stopwords=["c", "d"])], + ) + m = AsyncMapping() + m.field("title", "text", analyzer=a1, search_analyzer=a2) + m.field( + "text", + "text", + analyzer=a1, + fields={ + "english": Text(analyzer=a1), + "unknown": Keyword(analyzer=a1, search_analyzer=a2), + }, + ) + assert { + "analyzer": { + "my_analyzer1": { + "filter": ["lowercase", "my_filter1"], + "tokenizer": "keyword", + "type": "custom", + }, + "my_analyzer2": { + "filter": ["my_filter2"], + "tokenizer": "trigram", + "type": "custom", + }, + }, + "filter": { + "my_filter1": {"stopwords": ["a", "b"], "type": "stop"}, + "my_filter2": {"stopwords": ["c", "d"], "type": "stop"}, + }, + "tokenizer": {"trigram": {"max_gram": 3, "min_gram": 3, "type": "nGram"}}, + } == m._collect_analysis() + + +def test_even_non_custom_analyzers_can_have_params() -> None: + a1 = analysis.analyzer("whitespace", type="pattern", pattern=r"\\s+") + m = AsyncMapping() + m.field("title", "text", analyzer=a1) + + assert { + "analyzer": {"whitespace": {"type": "pattern", "pattern": r"\\s+"}} + } == m._collect_analysis() + + +def test_resolve_field_can_resolve_multifields() -> None: + m = AsyncMapping() + m.field("title", "text", fields={"keyword": Keyword()}) + + assert isinstance(m.resolve_field("title.keyword"), Keyword) + + +def test_resolve_nested() -> None: + m = AsyncMapping() + m.field("n1", "nested", properties={"n2": Nested(properties={"k1": Keyword()})}) + m.field("k2", "keyword") + + nested, field = m.resolve_nested("n1.n2.k1") + assert nested == ["n1", "n1.n2"] + assert isinstance(field, Keyword) + + nested, field = m.resolve_nested("k2") + assert nested == [] + assert isinstance(field, Keyword) diff --git a/test_elasticsearch/test_dsl/_async/test_search.py b/test_elasticsearch/test_dsl/_async/test_search.py new file mode 100644 index 000000000..a00ddf448 --- /dev/null +++ b/test_elasticsearch/test_dsl/_async/test_search.py @@ -0,0 +1,841 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from copy import deepcopy +from typing import Any + +import pytest +from pytest import raises + +from elasticsearch.dsl import ( + AsyncEmptySearch, + AsyncSearch, + Document, + Q, + query, + types, + wrappers, +) +from elasticsearch.dsl.exceptions import IllegalOperation + + +def test_expand__to_dot_is_respected() -> None: + s = AsyncSearch().query("match", a__b=42, _expand__to_dot=False) + + assert {"query": {"match": {"a__b": 42}}} == s.to_dict() + + +@pytest.mark.asyncio +async def test_execute_uses_cache() -> None: + s = AsyncSearch() + r = object() + s._response = r # type: ignore[assignment] + + assert r is await s.execute() + + +@pytest.mark.asyncio +async def test_cache_can_be_ignored(async_mock_client: Any) -> None: + s = AsyncSearch(using="mock") + r = object() + s._response = r # type: ignore[assignment] + await s.execute(ignore_cache=True) + + async_mock_client.search.assert_awaited_once_with(index=None, body={}) + + +@pytest.mark.asyncio +async def test_iter_iterates_over_hits() -> None: + s = AsyncSearch() + s._response = [1, 2, 3] # type: ignore[assignment] + + assert [1, 2, 3] == [hit async for hit in s] + + +def test_cache_isnt_cloned() -> None: + s = AsyncSearch() + s._response = object() # type: ignore[assignment] + + assert not hasattr(s._clone(), "_response") + + +def test_search_starts_with_no_query() -> None: + s = AsyncSearch() + + assert s.query._proxied is None + + +def test_search_query_combines_query() -> None: + s = AsyncSearch() + + s2 = s.query("match", f=42) + assert s2.query._proxied == query.Match(f=42) + assert s.query._proxied is None + + s3 = s2.query("match", f=43) + assert s2.query._proxied == query.Match(f=42) + assert s3.query._proxied == query.Bool(must=[query.Match(f=42), query.Match(f=43)]) + + +def test_query_can_be_assigned_to() -> None: + s = AsyncSearch() + + q = Q("match", title="python") + s.query = q # type: ignore + + assert s.query._proxied is q + + +def test_query_can_be_wrapped() -> None: + s = AsyncSearch().query("match", title="python") + + s.query = Q("function_score", query=s.query, field_value_factor={"field": "rating"}) # type: ignore + + assert { + "query": { + "function_score": { + "functions": [{"field_value_factor": {"field": "rating"}}], + "query": {"match": {"title": "python"}}, + } + } + } == s.to_dict() + + +def test_using() -> None: + o = object() + o2 = object() + s = AsyncSearch(using=o) + assert s._using is o + s2 = s.using(o2) # type: ignore[arg-type] + assert s._using is o + assert s2._using is o2 + + +def test_methods_are_proxied_to_the_query() -> None: + s = AsyncSearch().query("match_all") + + assert s.query.to_dict() == {"match_all": {}} + + +def test_query_always_returns_search() -> None: + s = AsyncSearch() + + assert isinstance(s.query("match", f=42), AsyncSearch) + + +def test_source_copied_on_clone() -> None: + s = AsyncSearch().source(False) + assert s._clone()._source == s._source + assert s._clone()._source is False + + s2 = AsyncSearch().source([]) + assert s2._clone()._source == s2._source + assert s2._source == [] + + s3 = AsyncSearch().source(["some", "fields"]) + assert s3._clone()._source == s3._source + assert s3._clone()._source == ["some", "fields"] + + +def test_copy_clones() -> None: + from copy import copy + + s1 = AsyncSearch().source(["some", "fields"]) + s2 = copy(s1) + + assert s1 == s2 + assert s1 is not s2 + + +def test_aggs_allow_two_metric() -> None: + s = AsyncSearch() + + s.aggs.metric("a", "max", field="a").metric("b", "max", field="b") + + assert s.to_dict() == { + "aggs": {"a": {"max": {"field": "a"}}, "b": {"max": {"field": "b"}}} + } + + +def test_aggs_get_copied_on_change() -> None: + s = AsyncSearch().query("match_all") + s.aggs.bucket("per_tag", "terms", field="f").metric( + "max_score", "max", field="score" + ) + + s2 = s.query("match_all") + s2.aggs.bucket("per_month", "date_histogram", field="date", interval="month") + s3 = s2.query("match_all") + s3.aggs["per_month"].metric("max_score", "max", field="score") + s4 = s3._clone() + s4.aggs.metric("max_score", "max", field="score") + + d: Any = { + "query": {"match_all": {}}, + "aggs": { + "per_tag": { + "terms": {"field": "f"}, + "aggs": {"max_score": {"max": {"field": "score"}}}, + } + }, + } + + assert d == s.to_dict() + d["aggs"]["per_month"] = {"date_histogram": {"field": "date", "interval": "month"}} + assert d == s2.to_dict() + d["aggs"]["per_month"]["aggs"] = {"max_score": {"max": {"field": "score"}}} + assert d == s3.to_dict() + d["aggs"]["max_score"] = {"max": {"field": "score"}} + assert d == s4.to_dict() + + +def test_search_index() -> None: + s = AsyncSearch(index="i") + assert s._index == ["i"] + s = s.index("i2") + assert s._index == ["i", "i2"] + s = s.index("i3") + assert s._index == ["i", "i2", "i3"] + s = s.index() + assert s._index is None + s = AsyncSearch(index=("i", "i2")) + assert s._index == ["i", "i2"] + s = AsyncSearch(index=["i", "i2"]) + assert s._index == ["i", "i2"] + s = AsyncSearch() + s = s.index("i", "i2") + assert s._index == ["i", "i2"] + s2 = s.index("i3") + assert s._index == ["i", "i2"] + assert s2._index == ["i", "i2", "i3"] + s = AsyncSearch() + s = s.index(["i", "i2"], "i3") + assert s._index == ["i", "i2", "i3"] + s2 = s.index("i4") + assert s._index == ["i", "i2", "i3"] + assert s2._index == ["i", "i2", "i3", "i4"] + s2 = s.index(["i4"]) + assert s2._index == ["i", "i2", "i3", "i4"] + s2 = s.index(("i4", "i5")) + assert s2._index == ["i", "i2", "i3", "i4", "i5"] + + +def test_doc_type_document_class() -> None: + class MyDocument(Document): + pass + + s = AsyncSearch(doc_type=MyDocument) + assert s._doc_type == [MyDocument] + assert s._doc_type_map == {} + + s = AsyncSearch().doc_type(MyDocument) + assert s._doc_type == [MyDocument] + assert s._doc_type_map == {} + + +def test_knn() -> None: + s = AsyncSearch() + + with raises(TypeError): + s.knn() # type: ignore[call-arg] + with raises(TypeError): + s.knn("field") # type: ignore[call-arg] + with raises(TypeError): + s.knn("field", 5) # type: ignore[call-arg] + with raises(ValueError): + s.knn("field", 5, 100) + with raises(ValueError): + s.knn("field", 5, 100, query_vector=[1, 2, 3], query_vector_builder={}) + + s = s.knn("field", 5, 100, query_vector=[1, 2, 3]) + assert { + "knn": { + "field": "field", + "k": 5, + "num_candidates": 100, + "query_vector": [1, 2, 3], + } + } == s.to_dict() + + s = s.knn( + k=4, + num_candidates=40, + boost=0.8, + field="name", + query_vector_builder={ + "text_embedding": {"model_id": "foo", "model_text": "search text"} + }, + inner_hits={"size": 1}, + ) + assert { + "knn": [ + { + "field": "field", + "k": 5, + "num_candidates": 100, + "query_vector": [1, 2, 3], + }, + { + "field": "name", + "k": 4, + "num_candidates": 40, + "query_vector_builder": { + "text_embedding": {"model_id": "foo", "model_text": "search text"} + }, + "boost": 0.8, + "inner_hits": {"size": 1}, + }, + ] + } == s.to_dict() + + +def test_rank() -> None: + s = AsyncSearch() + s.rank(rrf=False) + assert {} == s.to_dict() + + s = s.rank(rrf=True) + assert {"rank": {"rrf": {}}} == s.to_dict() + + s = s.rank(rrf={"window_size": 50, "rank_constant": 20}) + assert {"rank": {"rrf": {"window_size": 50, "rank_constant": 20}}} == s.to_dict() + + +def test_sort() -> None: + s = AsyncSearch() + s = s.sort("fielda", "-fieldb") + + assert ["fielda", {"fieldb": {"order": "desc"}}] == s._sort + assert {"sort": ["fielda", {"fieldb": {"order": "desc"}}]} == s.to_dict() + + s = s.sort() + assert [] == s._sort + assert AsyncSearch().to_dict() == s.to_dict() + + +def test_sort_by_score() -> None: + s = AsyncSearch() + s = s.sort("_score") + assert {"sort": ["_score"]} == s.to_dict() + + s = AsyncSearch() + with raises(IllegalOperation): + s.sort("-_score") + + +def test_collapse() -> None: + s = AsyncSearch() + + inner_hits = {"name": "most_recent", "size": 5, "sort": [{"@timestamp": "desc"}]} + s = s.collapse("user.id", inner_hits=inner_hits, max_concurrent_group_searches=4) + + assert { + "field": "user.id", + "inner_hits": { + "name": "most_recent", + "size": 5, + "sort": [{"@timestamp": "desc"}], + }, + "max_concurrent_group_searches": 4, + } == s._collapse + assert { + "collapse": { + "field": "user.id", + "inner_hits": { + "name": "most_recent", + "size": 5, + "sort": [{"@timestamp": "desc"}], + }, + "max_concurrent_group_searches": 4, + } + } == s.to_dict() + + s = s.collapse() + assert {} == s._collapse + assert AsyncSearch().to_dict() == s.to_dict() + + +def test_slice() -> None: + s = AsyncSearch() + assert {"from": 3, "size": 7} == s[3:10].to_dict() + assert {"size": 5} == s[:5].to_dict() + assert {"from": 3} == s[3:].to_dict() + assert {"from": 0, "size": 0} == s[0:0].to_dict() + assert {"from": 20, "size": 0} == s[20:0].to_dict() + assert {"from": 10, "size": 5} == s[10:][:5].to_dict() + assert {"from": 10, "size": 0} == s[:5][10:].to_dict() + assert {"size": 10} == s[:10][:40].to_dict() + assert {"size": 10} == s[:40][:10].to_dict() + assert {"size": 40} == s[:40][:80].to_dict() + assert {"from": 12, "size": 0} == s[:5][10:][2:].to_dict() + assert {"from": 15, "size": 0} == s[10:][:5][5:].to_dict() + assert {} == s[:].to_dict() + with raises(ValueError): + s[-1:] + with raises(ValueError): + s[4:-1] + with raises(ValueError): + s[-3:-2] + + +def test_index() -> None: + s = AsyncSearch() + assert {"from": 3, "size": 1} == s[3].to_dict() + assert {"from": 3, "size": 1} == s[3][0].to_dict() + assert {"from": 8, "size": 0} == s[3][5].to_dict() + assert {"from": 4, "size": 1} == s[3:10][1].to_dict() + with raises(ValueError): + s[-3] + + +def test_search_to_dict() -> None: + s = AsyncSearch() + assert {} == s.to_dict() + + s = s.query("match", f=42) + assert {"query": {"match": {"f": 42}}} == s.to_dict() + + assert {"query": {"match": {"f": 42}}, "size": 10} == s.to_dict(size=10) + + s.aggs.bucket("per_tag", "terms", field="f").metric( + "max_score", "max", field="score" + ) + d = { + "aggs": { + "per_tag": { + "terms": {"field": "f"}, + "aggs": {"max_score": {"max": {"field": "score"}}}, + } + }, + "query": {"match": {"f": 42}}, + } + assert d == s.to_dict() + + s = AsyncSearch(extra={"size": 5}) + assert {"size": 5} == s.to_dict() + s = s.extra(from_=42) + assert {"size": 5, "from": 42} == s.to_dict() + + +def test_complex_example() -> None: + s = AsyncSearch() + s = ( + s.query("match", title="python") + .query(~Q("match", title="ruby")) + .filter(Q("term", category="meetup") | Q("term", category="conference")) + .collapse("user_id") + .post_filter("terms", tags=["prague", "czech"]) + .script_fields(more_attendees="doc['attendees'].value + 42") + ) + + s.aggs.bucket("per_country", "terms", field="country").metric( + "avg_attendees", "avg", field="attendees" + ) + + s.query.minimum_should_match = 2 + + s = s.highlight_options(order="score").highlight("title", "body", fragment_size=50) + + assert { + "query": { + "bool": { + "filter": [ + { + "bool": { + "should": [ + {"term": {"category": "meetup"}}, + {"term": {"category": "conference"}}, + ] + } + } + ], + "must": [{"match": {"title": "python"}}], + "must_not": [{"match": {"title": "ruby"}}], + "minimum_should_match": 2, + } + }, + "post_filter": {"terms": {"tags": ["prague", "czech"]}}, + "aggs": { + "per_country": { + "terms": {"field": "country"}, + "aggs": {"avg_attendees": {"avg": {"field": "attendees"}}}, + } + }, + "collapse": {"field": "user_id"}, + "highlight": { + "order": "score", + "fields": {"title": {"fragment_size": 50}, "body": {"fragment_size": 50}}, + }, + "script_fields": {"more_attendees": {"script": "doc['attendees'].value + 42"}}, + } == s.to_dict() + + +def test_reverse() -> None: + d = { + "query": { + "bool": { + "filter": [ + { + "bool": { + "should": [ + {"term": {"category": "meetup"}}, + {"term": {"category": "conference"}}, + ] + } + } + ], + "must": [ + { + "bool": { + "must": [{"match": {"title": "python"}}], + "must_not": [{"match": {"title": "ruby"}}], + "minimum_should_match": 2, + } + } + ], + } + }, + "post_filter": {"bool": {"must": [{"terms": {"tags": ["prague", "czech"]}}]}}, + "aggs": { + "per_country": { + "terms": {"field": "country"}, + "aggs": {"avg_attendees": {"avg": {"field": "attendees"}}}, + } + }, + "sort": ["title", {"category": {"order": "desc"}}, "_score"], + "size": 5, + "highlight": {"order": "score", "fields": {"title": {"fragment_size": 50}}}, + "suggest": { + "my-title-suggestions-1": { + "text": "devloping distibutd saerch engies", + "term": {"size": 3, "field": "title"}, + } + }, + "script_fields": {"more_attendees": {"script": "doc['attendees'].value + 42"}}, + } + + d2 = deepcopy(d) + + s = AsyncSearch.from_dict(d) + + # make sure we haven't modified anything in place + assert d == d2 + assert {"size": 5} == s._extra + assert d == s.to_dict() + + +def test_code_generated_classes() -> None: + s = AsyncSearch() + s = ( + s.query(query.Match("title", types.MatchQuery(query="python"))) + .query(~query.Match("title", types.MatchQuery(query="ruby"))) + .query( + query.Knn( + field="title", + query_vector=[1.0, 2.0, 3.0], + num_candidates=10, + k=3, + filter=query.Range("year", wrappers.Range(gt="2004")), + ) + ) + .filter( + query.Term("category", types.TermQuery(value="meetup")) + | query.Term("category", types.TermQuery(value="conference")) + ) + .collapse("user_id") + .post_filter(query.Terms(tags=["prague", "czech"])) + .script_fields(more_attendees="doc['attendees'].value + 42") + ) + assert { + "query": { + "bool": { + "filter": [ + { + "bool": { + "should": [ + {"term": {"category": {"value": "meetup"}}}, + {"term": {"category": {"value": "conference"}}}, + ] + } + } + ], + "must": [ + {"match": {"title": {"query": "python"}}}, + { + "knn": { + "field": "title", + "filter": [ + { + "range": { + "year": { + "gt": "2004", + }, + }, + }, + ], + "k": 3, + "num_candidates": 10, + "query_vector": [ + 1.0, + 2.0, + 3.0, + ], + }, + }, + ], + "must_not": [{"match": {"title": {"query": "ruby"}}}], + } + }, + "post_filter": {"terms": {"tags": ["prague", "czech"]}}, + "collapse": {"field": "user_id"}, + "script_fields": {"more_attendees": {"script": "doc['attendees'].value + 42"}}, + } == s.to_dict() + + +def test_from_dict_doesnt_need_query() -> None: + s = AsyncSearch.from_dict({"size": 5}) + + assert {"size": 5} == s.to_dict() + + +@pytest.mark.asyncio +async def test_params_being_passed_to_search(async_mock_client: Any) -> None: + s = AsyncSearch(using="mock") + s = s.params(routing="42") + await s.execute() + + async_mock_client.search.assert_awaited_once_with(index=None, body={}, routing="42") + + +def test_source() -> None: + assert {} == AsyncSearch().source().to_dict() + + assert { + "_source": {"includes": ["foo.bar.*"], "excludes": ["foo.one"]} + } == AsyncSearch().source(includes=["foo.bar.*"], excludes=("foo.one",)).to_dict() + + assert {"_source": False} == AsyncSearch().source(False).to_dict() + + assert {"_source": ["f1", "f2"]} == AsyncSearch().source( + includes=["foo.bar.*"], excludes=["foo.one"] + ).source(["f1", "f2"]).to_dict() + + +def test_source_on_clone() -> None: + assert { + "_source": {"includes": ["foo.bar.*"], "excludes": ["foo.one"]}, + "query": {"bool": {"filter": [{"term": {"title": "python"}}]}}, + } == AsyncSearch().source(includes=["foo.bar.*"]).source( + excludes=["foo.one"] + ).filter( + "term", title="python" + ).to_dict() + assert { + "_source": False, + "query": {"bool": {"filter": [{"term": {"title": "python"}}]}}, + } == AsyncSearch().source(False).filter("term", title="python").to_dict() + + +def test_source_on_clear() -> None: + assert ( + {} + == AsyncSearch() + .source(includes=["foo.bar.*"]) + .source(includes=None, excludes=None) + .to_dict() + ) + + +def test_suggest_accepts_global_text() -> None: + s = AsyncSearch.from_dict( + { + "suggest": { + "text": "the amsterdma meetpu", + "my-suggest-1": {"term": {"field": "title"}}, + "my-suggest-2": {"text": "other", "term": {"field": "body"}}, + } + } + ) + + assert { + "suggest": { + "my-suggest-1": { + "term": {"field": "title"}, + "text": "the amsterdma meetpu", + }, + "my-suggest-2": {"term": {"field": "body"}, "text": "other"}, + } + } == s.to_dict() + + +def test_suggest() -> None: + s = AsyncSearch() + s = s.suggest("my_suggestion", "pyhton", term={"field": "title"}) + + assert { + "suggest": {"my_suggestion": {"term": {"field": "title"}, "text": "pyhton"}} + } == s.to_dict() + + +def test_exclude() -> None: + s = AsyncSearch() + s = s.exclude("match", title="python") + + assert { + "query": { + "bool": { + "filter": [{"bool": {"must_not": [{"match": {"title": "python"}}]}}] + } + } + } == s.to_dict() + + +@pytest.mark.asyncio +async def test_delete_by_query(async_mock_client: Any) -> None: + s = AsyncSearch(using="mock", index="i").query("match", lang="java") + await s.delete() + + async_mock_client.delete_by_query.assert_awaited_once_with( + index=["i"], body={"query": {"match": {"lang": "java"}}} + ) + + +def test_update_from_dict() -> None: + s = AsyncSearch() + s.update_from_dict({"indices_boost": [{"important-documents": 2}]}) + s.update_from_dict({"_source": ["id", "name"]}) + s.update_from_dict({"collapse": {"field": "user_id"}}) + + assert { + "indices_boost": [{"important-documents": 2}], + "_source": ["id", "name"], + "collapse": {"field": "user_id"}, + } == s.to_dict() + + +def test_rescore_query_to_dict() -> None: + s = AsyncSearch(index="index-name") + + positive_query = Q( + "function_score", + query=Q("term", tags="a"), + script_score={"script": "_score * 1"}, + ) + + negative_query = Q( + "function_score", + query=Q("term", tags="b"), + script_score={"script": "_score * -100"}, + ) + + s = s.query(positive_query) + s = s.extra( + rescore={"window_size": 100, "query": {"rescore_query": negative_query}} + ) + assert s.to_dict() == { + "query": { + "function_score": { + "query": {"term": {"tags": "a"}}, + "functions": [{"script_score": {"script": "_score * 1"}}], + } + }, + "rescore": { + "window_size": 100, + "query": { + "rescore_query": { + "function_score": { + "query": {"term": {"tags": "b"}}, + "functions": [{"script_score": {"script": "_score * -100"}}], + } + } + }, + }, + } + + assert s.to_dict( + rescore={"window_size": 10, "query": {"rescore_query": positive_query}} + ) == { + "query": { + "function_score": { + "query": {"term": {"tags": "a"}}, + "functions": [{"script_score": {"script": "_score * 1"}}], + } + }, + "rescore": { + "window_size": 10, + "query": { + "rescore_query": { + "function_score": { + "query": {"term": {"tags": "a"}}, + "functions": [{"script_score": {"script": "_score * 1"}}], + } + } + }, + }, + } + + +@pytest.mark.asyncio +async def test_empty_search() -> None: + s = AsyncEmptySearch(index="index-name") + s = s.query("match", lang="java") + s.aggs.bucket("versions", "terms", field="version") + + assert await s.count() == 0 + assert [hit async for hit in s] == [] + assert [hit async for hit in s.scan()] == [] + await s.delete() # should not error + + +def test_suggest_completion() -> None: + s = AsyncSearch() + s = s.suggest("my_suggestion", "pyhton", completion={"field": "title"}) + + assert { + "suggest": { + "my_suggestion": {"completion": {"field": "title"}, "prefix": "pyhton"} + } + } == s.to_dict() + + +def test_suggest_regex_query() -> None: + s = AsyncSearch() + s = s.suggest("my_suggestion", regex="py[thon|py]", completion={"field": "title"}) + + assert { + "suggest": { + "my_suggestion": {"completion": {"field": "title"}, "regex": "py[thon|py]"} + } + } == s.to_dict() + + +def test_suggest_must_pass_text_or_regex() -> None: + s = AsyncSearch() + with raises(ValueError): + s.suggest("my_suggestion") + + +def test_suggest_can_only_pass_text_or_regex() -> None: + s = AsyncSearch() + with raises(ValueError): + s.suggest("my_suggestion", text="python", regex="py[hton|py]") + + +def test_suggest_regex_must_be_wtih_completion() -> None: + s = AsyncSearch() + with raises(ValueError): + s.suggest("my_suggestion", regex="py[thon|py]") diff --git a/test_elasticsearch/test_dsl/_async/test_update_by_query.py b/test_elasticsearch/test_dsl/_async/test_update_by_query.py new file mode 100644 index 000000000..9253623dc --- /dev/null +++ b/test_elasticsearch/test_dsl/_async/test_update_by_query.py @@ -0,0 +1,180 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from copy import deepcopy +from typing import Any + +import pytest + +from elasticsearch.dsl import AsyncUpdateByQuery, Q +from elasticsearch.dsl.response import UpdateByQueryResponse +from elasticsearch.dsl.search_base import SearchBase + + +def test_ubq_starts_with_no_query() -> None: + ubq = AsyncUpdateByQuery() + + assert ubq.query._proxied is None + + +def test_ubq_to_dict() -> None: + ubq = AsyncUpdateByQuery() + assert {} == ubq.to_dict() + + ubq = ubq.query("match", f=42) + assert {"query": {"match": {"f": 42}}} == ubq.to_dict() + + assert {"query": {"match": {"f": 42}}, "size": 10} == ubq.to_dict(size=10) + + ubq = AsyncUpdateByQuery(extra={"size": 5}) + assert {"size": 5} == ubq.to_dict() + + ubq = AsyncUpdateByQuery(extra={"extra_q": Q("term", category="conference")}) + assert {"extra_q": {"term": {"category": "conference"}}} == ubq.to_dict() + + +def test_complex_example() -> None: + ubq = AsyncUpdateByQuery() + ubq = ( + ubq.query("match", title="python") + .query(~Q("match", title="ruby")) + .filter(Q("term", category="meetup") | Q("term", category="conference")) + .script( + source="ctx._source.likes += params.f", lang="painless", params={"f": 3} + ) + ) + + ubq.query.minimum_should_match = 2 + assert { + "query": { + "bool": { + "filter": [ + { + "bool": { + "should": [ + {"term": {"category": "meetup"}}, + {"term": {"category": "conference"}}, + ] + } + } + ], + "must": [{"match": {"title": "python"}}], + "must_not": [{"match": {"title": "ruby"}}], + "minimum_should_match": 2, + } + }, + "script": { + "source": "ctx._source.likes += params.f", + "lang": "painless", + "params": {"f": 3}, + }, + } == ubq.to_dict() + + +def test_exclude() -> None: + ubq = AsyncUpdateByQuery() + ubq = ubq.exclude("match", title="python") + + assert { + "query": { + "bool": { + "filter": [{"bool": {"must_not": [{"match": {"title": "python"}}]}}] + } + } + } == ubq.to_dict() + + +def test_reverse() -> None: + d = { + "query": { + "bool": { + "filter": [ + { + "bool": { + "should": [ + {"term": {"category": "meetup"}}, + {"term": {"category": "conference"}}, + ] + } + } + ], + "must": [ + { + "bool": { + "must": [{"match": {"title": "python"}}], + "must_not": [{"match": {"title": "ruby"}}], + "minimum_should_match": 2, + } + } + ], + } + }, + "script": { + "source": "ctx._source.likes += params.f", + "lang": "painless", + "params": {"f": 3}, + }, + } + + d2 = deepcopy(d) + + ubq = AsyncUpdateByQuery.from_dict(d) + + assert d == d2 + assert d == ubq.to_dict() + + +def test_from_dict_doesnt_need_query() -> None: + ubq = AsyncUpdateByQuery.from_dict({"script": {"source": "test"}}) + + assert {"script": {"source": "test"}} == ubq.to_dict() + + +@pytest.mark.asyncio +async def test_params_being_passed_to_search(async_mock_client: Any) -> None: + ubq = AsyncUpdateByQuery(using="mock", index="i") + ubq = ubq.params(routing="42") + await ubq.execute() + + async_mock_client.update_by_query.assert_called_once_with(index=["i"], routing="42") + + +def test_overwrite_script() -> None: + ubq = AsyncUpdateByQuery() + ubq = ubq.script( + source="ctx._source.likes += params.f", lang="painless", params={"f": 3} + ) + assert { + "script": { + "source": "ctx._source.likes += params.f", + "lang": "painless", + "params": {"f": 3}, + } + } == ubq.to_dict() + ubq = ubq.script(source="ctx._source.likes++") + assert {"script": {"source": "ctx._source.likes++"}} == ubq.to_dict() + + +def test_update_by_query_response_success() -> None: + ubqr = UpdateByQueryResponse(SearchBase(), {"timed_out": False, "failures": []}) + assert ubqr.success() + + ubqr = UpdateByQueryResponse(SearchBase(), {"timed_out": True, "failures": []}) + assert not ubqr.success() + + ubqr = UpdateByQueryResponse(SearchBase(), {"timed_out": False, "failures": [{}]}) + assert not ubqr.success() diff --git a/test_elasticsearch/test_dsl/_sync/__init__.py b/test_elasticsearch/test_dsl/_sync/__init__.py new file mode 100644 index 000000000..2a87d183f --- /dev/null +++ b/test_elasticsearch/test_dsl/_sync/__init__.py @@ -0,0 +1,16 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/test_elasticsearch/test_dsl/_sync/test_document.py b/test_elasticsearch/test_dsl/_sync/test_document.py new file mode 100644 index 000000000..05ad9d623 --- /dev/null +++ b/test_elasticsearch/test_dsl/_sync/test_document.py @@ -0,0 +1,883 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# this file creates several documents using bad or no types because +# these are still supported and should be kept functional in spite +# of not having appropriate type hints. For that reason the comment +# below disables many mypy checks that fails as a result of this. +# mypy: disable-error-code="assignment, index, arg-type, call-arg, operator, comparison-overlap, attr-defined" + +import codecs +import ipaddress +import pickle +import sys +from datetime import datetime +from hashlib import md5 +from typing import Any, ClassVar, Dict, List, Optional + +import pytest +from pytest import raises + +from elasticsearch.dsl import ( + Document, + Index, + InnerDoc, + M, + Mapping, + MetaField, + Range, + analyzer, + field, + mapped_field, + utils, +) +from elasticsearch.dsl.document_base import InstrumentedField +from elasticsearch.dsl.exceptions import IllegalOperation, ValidationException + + +class MyInner(InnerDoc): + old_field = field.Text() + + +class MyDoc(Document): + title = field.Keyword() + name = field.Text() + created_at = field.Date() + inner = field.Object(MyInner) + + +class MySubDoc(MyDoc): + name = field.Keyword() + + class Index: + name = "default-index" + + +class MyDoc2(Document): + extra = field.Long() + + +class MyMultiSubDoc(MyDoc2, MySubDoc): + pass + + +class Comment(InnerDoc): + title = field.Text() + tags = field.Keyword(multi=True) + + +class DocWithNested(Document): + comments = field.Nested(Comment) + + class Index: + name = "test-doc-with-nested" + + +class SimpleCommit(Document): + files = field.Text(multi=True) + + class Index: + name = "test-git" + + +class Secret(str): + pass + + +class SecretField(field.CustomField): + builtin_type = "text" + + def _serialize(self, data: Any) -> Any: + return codecs.encode(data, "rot_13") + + def _deserialize(self, data: Any) -> Any: + if isinstance(data, Secret): + return data + return Secret(codecs.decode(data, "rot_13")) + + +class SecretDoc(Document): + title = SecretField(index="no") + + class Index: + name = "test-secret-doc" + + +class NestedSecret(Document): + secrets = field.Nested(SecretDoc) + + class Index: + name = "test-nested-secret" + + +class OptionalObjectWithRequiredField(Document): + comments = field.Nested(properties={"title": field.Keyword(required=True)}) + + class Index: + name = "test-required" + + +class Host(Document): + ip = field.Ip() + + class Index: + name = "test-host" + + +def test_range_serializes_properly() -> None: + class D(Document): + lr: Range[int] = field.LongRange() + + d = D(lr=Range(lt=42)) + assert 40 in d.lr + assert 47 not in d.lr + assert {"lr": {"lt": 42}} == d.to_dict() + + d = D(lr={"lt": 42}) + assert {"lr": {"lt": 42}} == d.to_dict() + + +def test_range_deserializes_properly() -> None: + class D(InnerDoc): + lr = field.LongRange() + + d = D.from_es({"lr": {"lt": 42}}, True) + assert isinstance(d.lr, Range) + assert 40 in d.lr + assert 47 not in d.lr + + +def test_resolve_nested() -> None: + nested, field = NestedSecret._index.resolve_nested("secrets.title") + assert nested == ["secrets"] + assert field is NestedSecret._doc_type.mapping["secrets"]["title"] + + +def test_conflicting_mapping_raises_error_in_index_to_dict() -> None: + class A(Document): + name = field.Text() + + class B(Document): + name = field.Keyword() + + i = Index("i") + i.document(A) + i.document(B) + + with raises(ValueError): + i.to_dict() + + +def test_ip_address_serializes_properly() -> None: + host = Host(ip=ipaddress.IPv4Address("10.0.0.1")) + + assert {"ip": "10.0.0.1"} == host.to_dict() + + +def test_matches_uses_index() -> None: + assert SimpleCommit._matches({"_index": "test-git"}) + assert not SimpleCommit._matches({"_index": "not-test-git"}) + + +def test_matches_with_no_name_always_matches() -> None: + class D(Document): + pass + + assert D._matches({}) + assert D._matches({"_index": "whatever"}) + + +def test_matches_accepts_wildcards() -> None: + class MyDoc(Document): + class Index: + name = "my-*" + + assert MyDoc._matches({"_index": "my-index"}) + assert not MyDoc._matches({"_index": "not-my-index"}) + + +def test_assigning_attrlist_to_field() -> None: + sc = SimpleCommit() + l = ["README", "README.rst"] + sc.files = utils.AttrList(l) + + assert sc.to_dict()["files"] is l + + +def test_optional_inner_objects_are_not_validated_if_missing() -> None: + d = OptionalObjectWithRequiredField() + + d.full_clean() + + +def test_custom_field() -> None: + s = SecretDoc(title=Secret("Hello")) + + assert {"title": "Uryyb"} == s.to_dict() + assert s.title == "Hello" + + s = SecretDoc.from_es({"_source": {"title": "Uryyb"}}) + assert s.title == "Hello" + assert isinstance(s.title, Secret) + + +def test_custom_field_mapping() -> None: + assert { + "properties": {"title": {"index": "no", "type": "text"}} + } == SecretDoc._doc_type.mapping.to_dict() + + +def test_custom_field_in_nested() -> None: + s = NestedSecret() + s.secrets.append(SecretDoc(title=Secret("Hello"))) + + assert {"secrets": [{"title": "Uryyb"}]} == s.to_dict() + assert s.secrets[0].title == "Hello" + + +def test_multi_works_after_doc_has_been_saved() -> None: + c = SimpleCommit() + c.full_clean() + c.files.append("setup.py") + + assert c.to_dict() == {"files": ["setup.py"]} + + +def test_multi_works_in_nested_after_doc_has_been_serialized() -> None: + # Issue #359 + c = DocWithNested(comments=[Comment(title="First!")]) + + assert [] == c.comments[0].tags + assert {"comments": [{"title": "First!"}]} == c.to_dict() + assert [] == c.comments[0].tags + + +def test_null_value_for_object() -> None: + d = MyDoc(inner=None) + + assert d.inner is None + + +def test_inherited_doc_types_can_override_index() -> None: + class MyDocDifferentIndex(MySubDoc): + class Index: + name = "not-default-index" + settings = {"number_of_replicas": 0} + aliases: Dict[str, Any] = {"a": {}} + analyzers = [analyzer("my_analizer", tokenizer="keyword")] + + assert MyDocDifferentIndex._index._name == "not-default-index" + assert MyDocDifferentIndex()._get_index() == "not-default-index" + assert MyDocDifferentIndex._index.to_dict() == { + "aliases": {"a": {}}, + "mappings": { + "properties": { + "created_at": {"type": "date"}, + "inner": { + "type": "object", + "properties": {"old_field": {"type": "text"}}, + }, + "name": {"type": "keyword"}, + "title": {"type": "keyword"}, + } + }, + "settings": { + "analysis": { + "analyzer": {"my_analizer": {"tokenizer": "keyword", "type": "custom"}} + }, + "number_of_replicas": 0, + }, + } + + +def test_to_dict_with_meta() -> None: + d = MySubDoc(title="hello") + d.meta.routing = "some-parent" + + assert { + "_index": "default-index", + "_routing": "some-parent", + "_source": {"title": "hello"}, + } == d.to_dict(True) + + +def test_to_dict_with_meta_includes_custom_index() -> None: + d = MySubDoc(title="hello") + d.meta.index = "other-index" + + assert {"_index": "other-index", "_source": {"title": "hello"}} == d.to_dict(True) + + +def test_to_dict_without_skip_empty_will_include_empty_fields() -> None: + d = MySubDoc(tags=[], title=None, inner={}) + + assert {} == d.to_dict() + assert {"tags": [], "title": None, "inner": {}} == d.to_dict(skip_empty=False) + + +def test_attribute_can_be_removed() -> None: + d = MyDoc(title="hello") + + del d.title + assert "title" not in d._d_ + + +def test_doc_type_can_be_correctly_pickled() -> None: + d = DocWithNested( + title="Hello World!", comments=[Comment(title="hellp")], meta={"id": 42} + ) + s = pickle.dumps(d) + + d2 = pickle.loads(s) + + assert d2 == d + assert 42 == d2.meta.id + assert "Hello World!" == d2.title + assert [{"title": "hellp"}] == d2.comments + assert isinstance(d2.comments[0], Comment) + + +def test_meta_is_accessible_even_on_empty_doc() -> None: + d = MyDoc() + d.meta + + d = MyDoc(title="aaa") + d.meta + + +def test_meta_field_mapping() -> None: + class User(Document): + username = field.Text() + + class Meta: + all = MetaField(enabled=False) + _index = MetaField(enabled=True) + dynamic = MetaField("strict") + dynamic_templates = MetaField([42]) + + assert { + "properties": {"username": {"type": "text"}}, + "_all": {"enabled": False}, + "_index": {"enabled": True}, + "dynamic": "strict", + "dynamic_templates": [42], + } == User._doc_type.mapping.to_dict() + + +def test_multi_value_fields() -> None: + class Blog(Document): + tags = field.Keyword(multi=True) + + b = Blog() + assert [] == b.tags + b.tags.append("search") + b.tags.append("python") + assert ["search", "python"] == b.tags + + +def test_docs_with_properties() -> None: + class User(Document): + pwd_hash: str = field.Text() + + def check_password(self, pwd: bytes) -> bool: + return md5(pwd).hexdigest() == self.pwd_hash + + @property + def password(self) -> None: + raise AttributeError("readonly") + + @password.setter + def password(self, pwd: bytes) -> None: + self.pwd_hash = md5(pwd).hexdigest() + + u = User(pwd_hash=md5(b"secret").hexdigest()) + assert u.check_password(b"secret") + assert not u.check_password(b"not-secret") + + u.password = b"not-secret" + assert "password" not in u._d_ + assert not u.check_password(b"secret") + assert u.check_password(b"not-secret") + + with raises(AttributeError): + u.password + + +def test_nested_can_be_assigned_to() -> None: + d1 = DocWithNested(comments=[Comment(title="First!")]) + d2 = DocWithNested() + + d2.comments = d1.comments + assert isinstance(d1.comments[0], Comment) + assert d2.comments == [{"title": "First!"}] + assert {"comments": [{"title": "First!"}]} == d2.to_dict() + assert isinstance(d2.comments[0], Comment) + + +def test_nested_can_be_none() -> None: + d = DocWithNested(comments=None, title="Hello World!") + + assert {"title": "Hello World!"} == d.to_dict() + + +def test_nested_defaults_to_list_and_can_be_updated() -> None: + md = DocWithNested() + + assert [] == md.comments + + md.comments.append({"title": "hello World!"}) + assert {"comments": [{"title": "hello World!"}]} == md.to_dict() + + +def test_to_dict_is_recursive_and_can_cope_with_multi_values() -> None: + md = MyDoc(name=["a", "b", "c"]) + md.inner = [MyInner(old_field="of1"), MyInner(old_field="of2")] + + assert isinstance(md.inner[0], MyInner) + + assert { + "name": ["a", "b", "c"], + "inner": [{"old_field": "of1"}, {"old_field": "of2"}], + } == md.to_dict() + + +def test_to_dict_ignores_empty_collections() -> None: + md = MySubDoc(name="", address={}, count=0, valid=False, tags=[]) + + assert {"name": "", "count": 0, "valid": False} == md.to_dict() + + +def test_declarative_mapping_definition() -> None: + assert issubclass(MyDoc, Document) + assert hasattr(MyDoc, "_doc_type") + assert { + "properties": { + "created_at": {"type": "date"}, + "name": {"type": "text"}, + "title": {"type": "keyword"}, + "inner": {"type": "object", "properties": {"old_field": {"type": "text"}}}, + } + } == MyDoc._doc_type.mapping.to_dict() + + +def test_you_can_supply_own_mapping_instance() -> None: + class MyD(Document): + title = field.Text() + + class Meta: + mapping = Mapping() + mapping.meta("_all", enabled=False) + + assert { + "_all": {"enabled": False}, + "properties": {"title": {"type": "text"}}, + } == MyD._doc_type.mapping.to_dict() + + +def test_document_can_be_created_dynamically() -> None: + n = datetime.now() + md = MyDoc(title="hello") + md.name = "My Fancy Document!" + md.created_at = n + + inner = md.inner + # consistent returns + assert inner is md.inner + inner.old_field = "Already defined." + + md.inner.new_field = ["undefined", "field"] + + assert { + "title": "hello", + "name": "My Fancy Document!", + "created_at": n, + "inner": {"old_field": "Already defined.", "new_field": ["undefined", "field"]}, + } == md.to_dict() + + +def test_invalid_date_will_raise_exception() -> None: + md = MyDoc() + md.created_at = "not-a-date" + with raises(ValidationException): + md.full_clean() + + +def test_document_inheritance() -> None: + assert issubclass(MySubDoc, MyDoc) + assert issubclass(MySubDoc, Document) + assert hasattr(MySubDoc, "_doc_type") + assert { + "properties": { + "created_at": {"type": "date"}, + "name": {"type": "keyword"}, + "title": {"type": "keyword"}, + "inner": {"type": "object", "properties": {"old_field": {"type": "text"}}}, + } + } == MySubDoc._doc_type.mapping.to_dict() + + +def test_child_class_can_override_parent() -> None: + class A(Document): + o = field.Object(dynamic=False, properties={"a": field.Text()}) + + class B(A): + o = field.Object(dynamic="strict", properties={"b": field.Text()}) + + assert { + "properties": { + "o": { + "dynamic": "strict", + "properties": {"a": {"type": "text"}, "b": {"type": "text"}}, + "type": "object", + } + } + } == B._doc_type.mapping.to_dict() + + +def test_meta_fields_are_stored_in_meta_and_ignored_by_to_dict() -> None: + md = MySubDoc(meta={"id": 42}, name="My First doc!") + + md.meta.index = "my-index" + assert md.meta.index == "my-index" + assert md.meta.id == 42 + assert {"name": "My First doc!"} == md.to_dict() + assert {"id": 42, "index": "my-index"} == md.meta.to_dict() + + +def test_index_inheritance() -> None: + assert issubclass(MyMultiSubDoc, MySubDoc) + assert issubclass(MyMultiSubDoc, MyDoc2) + assert issubclass(MyMultiSubDoc, Document) + assert hasattr(MyMultiSubDoc, "_doc_type") + assert hasattr(MyMultiSubDoc, "_index") + assert { + "properties": { + "created_at": {"type": "date"}, + "name": {"type": "keyword"}, + "title": {"type": "keyword"}, + "inner": {"type": "object", "properties": {"old_field": {"type": "text"}}}, + "extra": {"type": "long"}, + } + } == MyMultiSubDoc._doc_type.mapping.to_dict() + + +def test_meta_fields_can_be_set_directly_in_init() -> None: + p = object() + md = MyDoc(_id=p, title="Hello World!") + + assert md.meta.id is p + + +@pytest.mark.sync +def test_save_no_index(mock_client: Any) -> None: + md = MyDoc() + with raises(ValidationException): + md.save(using="mock") + + +@pytest.mark.sync +def test_delete_no_index(mock_client: Any) -> None: + md = MyDoc() + with raises(ValidationException): + md.delete(using="mock") + + +@pytest.mark.sync +def test_update_no_fields() -> None: + md = MyDoc() + with raises(IllegalOperation): + md.update() + + +def test_search_with_custom_alias_and_index() -> None: + search_object = MyDoc.search( + using="staging", index=["custom_index1", "custom_index2"] + ) + + assert search_object._using == "staging" + assert search_object._index == ["custom_index1", "custom_index2"] + + +def test_from_es_respects_underscored_non_meta_fields() -> None: + doc = { + "_index": "test-index", + "_id": "elasticsearch", + "_score": 12.0, + "fields": {"hello": "world", "_routing": "es", "_tags": ["search"]}, + "_source": { + "city": "Amsterdam", + "name": "Elasticsearch", + "_tagline": "You know, for search", + }, + } + + class Company(Document): + class Index: + name = "test-company" + + c = Company.from_es(doc) + + assert c.meta.fields._tags == ["search"] + assert c.meta.fields._routing == "es" + assert c._tagline == "You know, for search" + + +def test_nested_and_object_inner_doc() -> None: + class MySubDocWithNested(MyDoc): + nested_inner = field.Nested(MyInner) + + props = MySubDocWithNested._doc_type.mapping.to_dict()["properties"] + assert props == { + "created_at": {"type": "date"}, + "inner": {"properties": {"old_field": {"type": "text"}}, "type": "object"}, + "name": {"type": "text"}, + "nested_inner": { + "properties": {"old_field": {"type": "text"}}, + "type": "nested", + }, + "title": {"type": "keyword"}, + } + + +def test_doc_with_type_hints() -> None: + class TypedInnerDoc(InnerDoc): + st: M[str] + dt: M[Optional[datetime]] + li: M[List[int]] + + class TypedDoc(Document): + st: str + dt: Optional[datetime] + li: List[int] + ob: TypedInnerDoc + ns: List[TypedInnerDoc] + ip: Optional[str] = field.Ip() + k1: str = field.Keyword(required=True) + k2: M[str] = field.Keyword() + k3: str = mapped_field(field.Keyword(), default="foo") + k4: M[Optional[str]] = mapped_field(field.Keyword()) # type: ignore[misc] + s1: Secret = SecretField() + s2: M[Secret] = SecretField() + s3: Secret = mapped_field(SecretField()) # type: ignore[misc] + s4: M[Optional[Secret]] = mapped_field( + SecretField(), default_factory=lambda: "foo" + ) + i1: ClassVar + i2: ClassVar[int] + + props = TypedDoc._doc_type.mapping.to_dict()["properties"] + assert props == { + "st": {"type": "text"}, + "dt": {"type": "date"}, + "li": {"type": "integer"}, + "ob": { + "type": "object", + "properties": { + "st": {"type": "text"}, + "dt": {"type": "date"}, + "li": {"type": "integer"}, + }, + }, + "ns": { + "type": "nested", + "properties": { + "st": {"type": "text"}, + "dt": {"type": "date"}, + "li": {"type": "integer"}, + }, + }, + "ip": {"type": "ip"}, + "k1": {"type": "keyword"}, + "k2": {"type": "keyword"}, + "k3": {"type": "keyword"}, + "k4": {"type": "keyword"}, + "s1": {"type": "text"}, + "s2": {"type": "text"}, + "s3": {"type": "text"}, + "s4": {"type": "text"}, + } + + TypedDoc.i1 = "foo" + TypedDoc.i2 = 123 + + doc = TypedDoc() + assert doc.k3 == "foo" + assert doc.s4 == "foo" + with raises(ValidationException) as exc_info: + doc.full_clean() + assert set(exc_info.value.args[0].keys()) == { + "st", + "k1", + "k2", + "ob", + "s1", + "s2", + "s3", + } + + assert TypedDoc.i1 == "foo" + assert TypedDoc.i2 == 123 + + doc.st = "s" + doc.li = [1, 2, 3] + doc.k1 = "k1" + doc.k2 = "k2" + doc.ob.st = "s" + doc.ob.li = [1] + doc.s1 = "s1" + doc.s2 = "s2" + doc.s3 = "s3" + doc.full_clean() + + doc.ob = TypedInnerDoc(li=[1]) + with raises(ValidationException) as exc_info: + doc.full_clean() + assert set(exc_info.value.args[0].keys()) == {"ob"} + assert set(exc_info.value.args[0]["ob"][0].args[0].keys()) == {"st"} + + doc.ob.st = "s" + doc.ns.append(TypedInnerDoc(li=[1, 2])) + with raises(ValidationException) as exc_info: + doc.full_clean() + + doc.ns[0].st = "s" + doc.full_clean() + + doc.ip = "1.2.3.4" + n = datetime.now() + doc.dt = n + assert doc.to_dict() == { + "st": "s", + "li": [1, 2, 3], + "dt": n, + "ob": { + "st": "s", + "li": [1], + }, + "ns": [ + { + "st": "s", + "li": [1, 2], + } + ], + "ip": "1.2.3.4", + "k1": "k1", + "k2": "k2", + "k3": "foo", + "s1": "s1", + "s2": "s2", + "s3": "s3", + "s4": "foo", + } + + s = TypedDoc.search().sort(TypedDoc.st, -TypedDoc.dt, +TypedDoc.ob.st) + s.aggs.bucket("terms_agg", "terms", field=TypedDoc.k1) + assert s.to_dict() == { + "aggs": {"terms_agg": {"terms": {"field": "k1"}}}, + "sort": ["st", {"dt": {"order": "desc"}}, "ob.st"], + } + + +@pytest.mark.skipif(sys.version_info < (3, 10), reason="requires Python 3.10") +def test_doc_with_pipe_type_hints() -> None: + with pytest.raises(TypeError): + + class BadlyTypedDoc(Document): + s: str + f: str | int | None # type: ignore[syntax] + + class TypedDoc(Document): + s: str + f1: str | None # type: ignore[syntax] + f2: M[int | None] # type: ignore[syntax] + f3: M[datetime | None] # type: ignore[syntax] + + props = TypedDoc._doc_type.mapping.to_dict()["properties"] + assert props == { + "s": {"type": "text"}, + "f1": {"type": "text"}, + "f2": {"type": "integer"}, + "f3": {"type": "date"}, + } + + doc = TypedDoc() + with raises(ValidationException) as exc_info: + doc.full_clean() + assert set(exc_info.value.args[0].keys()) == {"s"} + doc.s = "s" + doc.full_clean() + + +def test_instrumented_field() -> None: + class Child(InnerDoc): + st: M[str] + + class Doc(Document): + st: str + ob: Child + ns: List[Child] + + doc = Doc( + st="foo", + ob=Child(st="bar"), + ns=[ + Child(st="baz"), + Child(st="qux"), + ], + ) + + assert type(doc.st) is str + assert doc.st == "foo" + + assert type(doc.ob) is Child + assert doc.ob.st == "bar" + + assert type(doc.ns) is utils.AttrList + assert doc.ns[0].st == "baz" + assert doc.ns[1].st == "qux" + assert type(doc.ns[0]) is Child + assert type(doc.ns[1]) is Child + + assert type(Doc.st) is InstrumentedField + assert str(Doc.st) == "st" + assert +Doc.st == "st" + assert -Doc.st == "-st" + assert Doc.st.to_dict() == {"type": "text"} + with raises(AttributeError): + Doc.st.something + + assert type(Doc.ob) is InstrumentedField + assert str(Doc.ob) == "ob" + assert str(Doc.ob.st) == "ob.st" + assert +Doc.ob.st == "ob.st" + assert -Doc.ob.st == "-ob.st" + assert Doc.ob.st.to_dict() == {"type": "text"} + with raises(AttributeError): + Doc.ob.something + with raises(AttributeError): + Doc.ob.st.something + + assert type(Doc.ns) is InstrumentedField + assert str(Doc.ns) == "ns" + assert str(Doc.ns.st) == "ns.st" + assert +Doc.ns.st == "ns.st" + assert -Doc.ns.st == "-ns.st" + assert Doc.ns.st.to_dict() == {"type": "text"} + with raises(AttributeError): + Doc.ns.something + with raises(AttributeError): + Doc.ns.st.something diff --git a/test_elasticsearch/test_dsl/_sync/test_faceted_search.py b/test_elasticsearch/test_dsl/_sync/test_faceted_search.py new file mode 100644 index 000000000..33b17bd1e --- /dev/null +++ b/test_elasticsearch/test_dsl/_sync/test_faceted_search.py @@ -0,0 +1,201 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from datetime import datetime + +import pytest + +from elasticsearch.dsl.faceted_search import ( + DateHistogramFacet, + FacetedSearch, + TermsFacet, +) + + +class BlogSearch(FacetedSearch): + doc_types = ["user", "post"] + fields = [ + "title^5", + "body", + ] + + facets = { + "category": TermsFacet(field="category.raw"), + "tags": TermsFacet(field="tags"), + } + + +def test_query_is_created_properly() -> None: + bs = BlogSearch("python search") + s = bs.build_search() + + assert s._doc_type == ["user", "post"] + assert { + "aggs": { + "_filter_tags": { + "filter": {"match_all": {}}, + "aggs": {"tags": {"terms": {"field": "tags"}}}, + }, + "_filter_category": { + "filter": {"match_all": {}}, + "aggs": {"category": {"terms": {"field": "category.raw"}}}, + }, + }, + "query": { + "multi_match": {"fields": ["title^5", "body"], "query": "python search"} + }, + "highlight": {"fields": {"body": {}, "title": {}}}, + } == s.to_dict() + + +def test_query_is_created_properly_with_sort_tuple() -> None: + bs = BlogSearch("python search", sort=("category", "-title")) + s = bs.build_search() + + assert s._doc_type == ["user", "post"] + assert { + "aggs": { + "_filter_tags": { + "filter": {"match_all": {}}, + "aggs": {"tags": {"terms": {"field": "tags"}}}, + }, + "_filter_category": { + "filter": {"match_all": {}}, + "aggs": {"category": {"terms": {"field": "category.raw"}}}, + }, + }, + "query": { + "multi_match": {"fields": ["title^5", "body"], "query": "python search"} + }, + "highlight": {"fields": {"body": {}, "title": {}}}, + "sort": ["category", {"title": {"order": "desc"}}], + } == s.to_dict() + + +def test_filter_is_applied_to_search_but_not_relevant_facet() -> None: + bs = BlogSearch("python search", filters={"category": "elastic"}) + s = bs.build_search() + + assert { + "aggs": { + "_filter_tags": { + "filter": {"terms": {"category.raw": ["elastic"]}}, + "aggs": {"tags": {"terms": {"field": "tags"}}}, + }, + "_filter_category": { + "filter": {"match_all": {}}, + "aggs": {"category": {"terms": {"field": "category.raw"}}}, + }, + }, + "post_filter": {"terms": {"category.raw": ["elastic"]}}, + "query": { + "multi_match": {"fields": ["title^5", "body"], "query": "python search"} + }, + "highlight": {"fields": {"body": {}, "title": {}}}, + } == s.to_dict() + + +def test_filters_are_applied_to_search_ant_relevant_facets() -> None: + bs = BlogSearch( + "python search", filters={"category": "elastic", "tags": ["python", "django"]} + ) + s = bs.build_search() + + d = s.to_dict() + + # we need to test post_filter without relying on order + f = d["post_filter"]["bool"].pop("must") + assert len(f) == 2 + assert {"terms": {"category.raw": ["elastic"]}} in f + assert {"terms": {"tags": ["python", "django"]}} in f + + assert { + "aggs": { + "_filter_tags": { + "filter": {"terms": {"category.raw": ["elastic"]}}, + "aggs": {"tags": {"terms": {"field": "tags"}}}, + }, + "_filter_category": { + "filter": {"terms": {"tags": ["python", "django"]}}, + "aggs": {"category": {"terms": {"field": "category.raw"}}}, + }, + }, + "query": { + "multi_match": {"fields": ["title^5", "body"], "query": "python search"} + }, + "post_filter": {"bool": {}}, + "highlight": {"fields": {"body": {}, "title": {}}}, + } == d + + +def test_date_histogram_facet_with_1970_01_01_date() -> None: + dhf = DateHistogramFacet() + assert dhf.get_value({"key": None}) == datetime(1970, 1, 1, 0, 0) # type: ignore[arg-type] + assert dhf.get_value({"key": 0}) == datetime(1970, 1, 1, 0, 0) # type: ignore[arg-type] + + +@pytest.mark.parametrize( + ["interval_type", "interval"], + [ + ("interval", "year"), + ("calendar_interval", "year"), + ("interval", "month"), + ("calendar_interval", "month"), + ("interval", "week"), + ("calendar_interval", "week"), + ("interval", "day"), + ("calendar_interval", "day"), + ("fixed_interval", "day"), + ("interval", "hour"), + ("fixed_interval", "hour"), + ("interval", "1Y"), + ("calendar_interval", "1Y"), + ("interval", "1M"), + ("calendar_interval", "1M"), + ("interval", "1w"), + ("calendar_interval", "1w"), + ("interval", "1d"), + ("calendar_interval", "1d"), + ("fixed_interval", "1d"), + ("interval", "1h"), + ("fixed_interval", "1h"), + ], +) +def test_date_histogram_interval_types(interval_type: str, interval: str) -> None: + dhf = DateHistogramFacet(field="@timestamp", **{interval_type: interval}) + assert dhf.get_aggregation().to_dict() == { + "date_histogram": { + "field": "@timestamp", + interval_type: interval, + "min_doc_count": 0, + } + } + dhf.get_value_filter(datetime.now()) + + +def test_date_histogram_no_interval_keyerror() -> None: + dhf = DateHistogramFacet(field="@timestamp") + with pytest.raises(KeyError) as e: + dhf.get_value_filter(datetime.now()) + assert str(e.value) == "'interval'" + + +def test_params_added_to_search() -> None: + bs = BlogSearch("python search") + assert bs._s._params == {} + bs.params(routing="42") + assert bs._s._params == {"routing": "42"} diff --git a/test_elasticsearch/test_dsl/_sync/test_index.py b/test_elasticsearch/test_dsl/_sync/test_index.py new file mode 100644 index 000000000..c6d1b7904 --- /dev/null +++ b/test_elasticsearch/test_dsl/_sync/test_index.py @@ -0,0 +1,190 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import string +from random import choice +from typing import Any, Dict + +import pytest +from pytest import raises + +from elasticsearch.dsl import Date, Document, Index, IndexTemplate, Text, analyzer + + +class Post(Document): + title = Text() + published_from = Date() + + +def test_multiple_doc_types_will_combine_mappings() -> None: + class User(Document): + username = Text() + + i = Index("i") + i.document(Post) + i.document(User) + assert { + "mappings": { + "properties": { + "title": {"type": "text"}, + "username": {"type": "text"}, + "published_from": {"type": "date"}, + } + } + } == i.to_dict() + + +def test_search_is_limited_to_index_name() -> None: + i = Index("my-index") + s = i.search() + + assert s._index == ["my-index"] + + +def test_cloned_index_has_copied_settings_and_using() -> None: + client = object() + i = Index("my-index", using=client) # type: ignore[arg-type] + i.settings(number_of_shards=1) + + i2 = i.clone("my-other-index") + + assert "my-other-index" == i2._name + assert client is i2._using + assert i._settings == i2._settings + assert i._settings is not i2._settings + + +def test_cloned_index_has_analysis_attribute() -> None: + """ + Regression test for Issue #582 in which `AsyncIndex.clone()` was not copying + over the `_analysis` attribute. + """ + client = object() + i = Index("my-index", using=client) # type: ignore[arg-type] + + random_analyzer_name = "".join(choice(string.ascii_letters) for _ in range(100)) + random_analyzer = analyzer( + random_analyzer_name, tokenizer="standard", filter="standard" + ) + + i.analyzer(random_analyzer) + + i2 = i.clone("my-clone-index") + + assert i.to_dict()["settings"]["analysis"] == i2.to_dict()["settings"]["analysis"] + + +def test_settings_are_saved() -> None: + i = Index("i") + i.settings(number_of_replicas=0) + i.settings(number_of_shards=1) + + assert {"settings": {"number_of_shards": 1, "number_of_replicas": 0}} == i.to_dict() + + +def test_registered_doc_type_included_in_to_dict() -> None: + i = Index("i", using="alias") + i.document(Post) + + assert { + "mappings": { + "properties": { + "title": {"type": "text"}, + "published_from": {"type": "date"}, + } + } + } == i.to_dict() + + +def test_registered_doc_type_included_in_search() -> None: + i = Index("i", using="alias") + i.document(Post) + + s = i.search() + + assert s._doc_type == [Post] + + +def test_aliases_add_to_object() -> None: + random_alias = "".join(choice(string.ascii_letters) for _ in range(100)) + alias_dict: Dict[str, Any] = {random_alias: {}} + + index = Index("i", using="alias") + index.aliases(**alias_dict) + + assert index._aliases == alias_dict + + +def test_aliases_returned_from_to_dict() -> None: + random_alias = "".join(choice(string.ascii_letters) for _ in range(100)) + alias_dict: Dict[str, Any] = {random_alias: {}} + + index = Index("i", using="alias") + index.aliases(**alias_dict) + + assert index._aliases == index.to_dict()["aliases"] == alias_dict + + +def test_analyzers_added_to_object() -> None: + random_analyzer_name = "".join(choice(string.ascii_letters) for _ in range(100)) + random_analyzer = analyzer( + random_analyzer_name, tokenizer="standard", filter="standard" + ) + + index = Index("i", using="alias") + index.analyzer(random_analyzer) + + assert index._analysis["analyzer"][random_analyzer_name] == { + "filter": ["standard"], + "type": "custom", + "tokenizer": "standard", + } + + +def test_analyzers_returned_from_to_dict() -> None: + random_analyzer_name = "".join(choice(string.ascii_letters) for _ in range(100)) + random_analyzer = analyzer( + random_analyzer_name, tokenizer="standard", filter="standard" + ) + index = Index("i", using="alias") + index.analyzer(random_analyzer) + + assert index.to_dict()["settings"]["analysis"]["analyzer"][ + random_analyzer_name + ] == {"filter": ["standard"], "type": "custom", "tokenizer": "standard"} + + +def test_conflicting_analyzer_raises_error() -> None: + i = Index("i") + i.analyzer("my_analyzer", tokenizer="whitespace", filter=["lowercase", "stop"]) + + with raises(ValueError): + i.analyzer("my_analyzer", tokenizer="keyword", filter=["lowercase", "stop"]) + + +def test_index_template_can_have_order() -> None: + i = Index("i-*") + it = i.as_template("i", order=2) + + assert {"index_patterns": ["i-*"], "order": 2} == it.to_dict() + + +@pytest.mark.sync +def test_index_template_save_result(mock_client: Any) -> None: + it = IndexTemplate("test-template", "test-*") + + assert it.save(using="mock") == mock_client.indices.put_template() diff --git a/test_elasticsearch/test_dsl/_sync/test_mapping.py b/test_elasticsearch/test_dsl/_sync/test_mapping.py new file mode 100644 index 000000000..0e63d2e05 --- /dev/null +++ b/test_elasticsearch/test_dsl/_sync/test_mapping.py @@ -0,0 +1,222 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import json + +from elasticsearch.dsl import Keyword, Mapping, Nested, Text, analysis + + +def test_mapping_can_has_fields() -> None: + m = Mapping() + m.field("name", "text").field("tags", "keyword") + + assert { + "properties": {"name": {"type": "text"}, "tags": {"type": "keyword"}} + } == m.to_dict() + + +def test_mapping_update_is_recursive() -> None: + m1 = Mapping() + m1.field("title", "text") + m1.field("author", "object") + m1.field("author", "object", properties={"name": {"type": "text"}}) + m1.meta("_all", enabled=False) + m1.meta("dynamic", False) + + m2 = Mapping() + m2.field("published_from", "date") + m2.field("author", "object", properties={"email": {"type": "text"}}) + m2.field("title", "text") + m2.field("lang", "keyword") + m2.meta("_analyzer", path="lang") + + m1.update(m2, update_only=True) + + assert { + "_all": {"enabled": False}, + "_analyzer": {"path": "lang"}, + "dynamic": False, + "properties": { + "published_from": {"type": "date"}, + "title": {"type": "text"}, + "lang": {"type": "keyword"}, + "author": { + "type": "object", + "properties": {"name": {"type": "text"}, "email": {"type": "text"}}, + }, + }, + } == m1.to_dict() + + +def test_properties_can_iterate_over_all_the_fields() -> None: + m = Mapping() + m.field("f1", "text", test_attr="f1", fields={"f2": Keyword(test_attr="f2")}) + m.field("f3", Nested(test_attr="f3", properties={"f4": Text(test_attr="f4")})) + + assert {"f1", "f2", "f3", "f4"} == { + f.test_attr for f in m.properties._collect_fields() + } + + +def test_mapping_can_collect_all_analyzers_and_normalizers() -> None: + a1 = analysis.analyzer( + "my_analyzer1", + tokenizer="keyword", + filter=[ + "lowercase", + analysis.token_filter("my_filter1", "stop", stopwords=["a", "b"]), + ], + ) + a2 = analysis.analyzer("english") + a3 = analysis.analyzer("unknown_custom") + a4 = analysis.analyzer( + "my_analyzer2", + tokenizer=analysis.tokenizer("trigram", "nGram", min_gram=3, max_gram=3), + filter=[analysis.token_filter("my_filter2", "stop", stopwords=["c", "d"])], + ) + a5 = analysis.analyzer("my_analyzer3", tokenizer="keyword") + n1 = analysis.normalizer("my_normalizer1", filter=["lowercase"]) + n2 = analysis.normalizer( + "my_normalizer2", + filter=[ + "my_filter1", + "my_filter2", + analysis.token_filter("my_filter3", "stop", stopwords=["e", "f"]), + ], + ) + n3 = analysis.normalizer("unknown_custom") + + m = Mapping() + m.field( + "title", + "text", + analyzer=a1, + fields={"english": Text(analyzer=a2), "unknown": Keyword(search_analyzer=a3)}, + ) + m.field("comments", Nested(properties={"author": Text(analyzer=a4)})) + m.field("normalized_title", "keyword", normalizer=n1) + m.field("normalized_comment", "keyword", normalizer=n2) + m.field("unknown", "keyword", normalizer=n3) + m.meta("_all", analyzer=a5) + + assert { + "analyzer": { + "my_analyzer1": { + "filter": ["lowercase", "my_filter1"], + "tokenizer": "keyword", + "type": "custom", + }, + "my_analyzer2": { + "filter": ["my_filter2"], + "tokenizer": "trigram", + "type": "custom", + }, + "my_analyzer3": {"tokenizer": "keyword", "type": "custom"}, + }, + "normalizer": { + "my_normalizer1": {"filter": ["lowercase"], "type": "custom"}, + "my_normalizer2": { + "filter": ["my_filter1", "my_filter2", "my_filter3"], + "type": "custom", + }, + }, + "filter": { + "my_filter1": {"stopwords": ["a", "b"], "type": "stop"}, + "my_filter2": {"stopwords": ["c", "d"], "type": "stop"}, + "my_filter3": {"stopwords": ["e", "f"], "type": "stop"}, + }, + "tokenizer": {"trigram": {"max_gram": 3, "min_gram": 3, "type": "nGram"}}, + } == m._collect_analysis() + + assert json.loads(json.dumps(m.to_dict())) == m.to_dict() + + +def test_mapping_can_collect_multiple_analyzers() -> None: + a1 = analysis.analyzer( + "my_analyzer1", + tokenizer="keyword", + filter=[ + "lowercase", + analysis.token_filter("my_filter1", "stop", stopwords=["a", "b"]), + ], + ) + a2 = analysis.analyzer( + "my_analyzer2", + tokenizer=analysis.tokenizer("trigram", "nGram", min_gram=3, max_gram=3), + filter=[analysis.token_filter("my_filter2", "stop", stopwords=["c", "d"])], + ) + m = Mapping() + m.field("title", "text", analyzer=a1, search_analyzer=a2) + m.field( + "text", + "text", + analyzer=a1, + fields={ + "english": Text(analyzer=a1), + "unknown": Keyword(analyzer=a1, search_analyzer=a2), + }, + ) + assert { + "analyzer": { + "my_analyzer1": { + "filter": ["lowercase", "my_filter1"], + "tokenizer": "keyword", + "type": "custom", + }, + "my_analyzer2": { + "filter": ["my_filter2"], + "tokenizer": "trigram", + "type": "custom", + }, + }, + "filter": { + "my_filter1": {"stopwords": ["a", "b"], "type": "stop"}, + "my_filter2": {"stopwords": ["c", "d"], "type": "stop"}, + }, + "tokenizer": {"trigram": {"max_gram": 3, "min_gram": 3, "type": "nGram"}}, + } == m._collect_analysis() + + +def test_even_non_custom_analyzers_can_have_params() -> None: + a1 = analysis.analyzer("whitespace", type="pattern", pattern=r"\\s+") + m = Mapping() + m.field("title", "text", analyzer=a1) + + assert { + "analyzer": {"whitespace": {"type": "pattern", "pattern": r"\\s+"}} + } == m._collect_analysis() + + +def test_resolve_field_can_resolve_multifields() -> None: + m = Mapping() + m.field("title", "text", fields={"keyword": Keyword()}) + + assert isinstance(m.resolve_field("title.keyword"), Keyword) + + +def test_resolve_nested() -> None: + m = Mapping() + m.field("n1", "nested", properties={"n2": Nested(properties={"k1": Keyword()})}) + m.field("k2", "keyword") + + nested, field = m.resolve_nested("n1.n2.k1") + assert nested == ["n1", "n1.n2"] + assert isinstance(field, Keyword) + + nested, field = m.resolve_nested("k2") + assert nested == [] + assert isinstance(field, Keyword) diff --git a/test_elasticsearch/test_dsl/_sync/test_search.py b/test_elasticsearch/test_dsl/_sync/test_search.py new file mode 100644 index 000000000..04b0ad53e --- /dev/null +++ b/test_elasticsearch/test_dsl/_sync/test_search.py @@ -0,0 +1,831 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from copy import deepcopy +from typing import Any + +import pytest +from pytest import raises + +from elasticsearch.dsl import Document, EmptySearch, Q, Search, query, types, wrappers +from elasticsearch.dsl.exceptions import IllegalOperation + + +def test_expand__to_dot_is_respected() -> None: + s = Search().query("match", a__b=42, _expand__to_dot=False) + + assert {"query": {"match": {"a__b": 42}}} == s.to_dict() + + +@pytest.mark.sync +def test_execute_uses_cache() -> None: + s = Search() + r = object() + s._response = r # type: ignore[assignment] + + assert r is s.execute() + + +@pytest.mark.sync +def test_cache_can_be_ignored(mock_client: Any) -> None: + s = Search(using="mock") + r = object() + s._response = r # type: ignore[assignment] + s.execute(ignore_cache=True) + + mock_client.search.assert_called_once_with(index=None, body={}) + + +@pytest.mark.sync +def test_iter_iterates_over_hits() -> None: + s = Search() + s._response = [1, 2, 3] # type: ignore[assignment] + + assert [1, 2, 3] == [hit for hit in s] + + +def test_cache_isnt_cloned() -> None: + s = Search() + s._response = object() # type: ignore[assignment] + + assert not hasattr(s._clone(), "_response") + + +def test_search_starts_with_no_query() -> None: + s = Search() + + assert s.query._proxied is None + + +def test_search_query_combines_query() -> None: + s = Search() + + s2 = s.query("match", f=42) + assert s2.query._proxied == query.Match(f=42) + assert s.query._proxied is None + + s3 = s2.query("match", f=43) + assert s2.query._proxied == query.Match(f=42) + assert s3.query._proxied == query.Bool(must=[query.Match(f=42), query.Match(f=43)]) + + +def test_query_can_be_assigned_to() -> None: + s = Search() + + q = Q("match", title="python") + s.query = q # type: ignore + + assert s.query._proxied is q + + +def test_query_can_be_wrapped() -> None: + s = Search().query("match", title="python") + + s.query = Q("function_score", query=s.query, field_value_factor={"field": "rating"}) # type: ignore + + assert { + "query": { + "function_score": { + "functions": [{"field_value_factor": {"field": "rating"}}], + "query": {"match": {"title": "python"}}, + } + } + } == s.to_dict() + + +def test_using() -> None: + o = object() + o2 = object() + s = Search(using=o) + assert s._using is o + s2 = s.using(o2) # type: ignore[arg-type] + assert s._using is o + assert s2._using is o2 + + +def test_methods_are_proxied_to_the_query() -> None: + s = Search().query("match_all") + + assert s.query.to_dict() == {"match_all": {}} + + +def test_query_always_returns_search() -> None: + s = Search() + + assert isinstance(s.query("match", f=42), Search) + + +def test_source_copied_on_clone() -> None: + s = Search().source(False) + assert s._clone()._source == s._source + assert s._clone()._source is False + + s2 = Search().source([]) + assert s2._clone()._source == s2._source + assert s2._source == [] + + s3 = Search().source(["some", "fields"]) + assert s3._clone()._source == s3._source + assert s3._clone()._source == ["some", "fields"] + + +def test_copy_clones() -> None: + from copy import copy + + s1 = Search().source(["some", "fields"]) + s2 = copy(s1) + + assert s1 == s2 + assert s1 is not s2 + + +def test_aggs_allow_two_metric() -> None: + s = Search() + + s.aggs.metric("a", "max", field="a").metric("b", "max", field="b") + + assert s.to_dict() == { + "aggs": {"a": {"max": {"field": "a"}}, "b": {"max": {"field": "b"}}} + } + + +def test_aggs_get_copied_on_change() -> None: + s = Search().query("match_all") + s.aggs.bucket("per_tag", "terms", field="f").metric( + "max_score", "max", field="score" + ) + + s2 = s.query("match_all") + s2.aggs.bucket("per_month", "date_histogram", field="date", interval="month") + s3 = s2.query("match_all") + s3.aggs["per_month"].metric("max_score", "max", field="score") + s4 = s3._clone() + s4.aggs.metric("max_score", "max", field="score") + + d: Any = { + "query": {"match_all": {}}, + "aggs": { + "per_tag": { + "terms": {"field": "f"}, + "aggs": {"max_score": {"max": {"field": "score"}}}, + } + }, + } + + assert d == s.to_dict() + d["aggs"]["per_month"] = {"date_histogram": {"field": "date", "interval": "month"}} + assert d == s2.to_dict() + d["aggs"]["per_month"]["aggs"] = {"max_score": {"max": {"field": "score"}}} + assert d == s3.to_dict() + d["aggs"]["max_score"] = {"max": {"field": "score"}} + assert d == s4.to_dict() + + +def test_search_index() -> None: + s = Search(index="i") + assert s._index == ["i"] + s = s.index("i2") + assert s._index == ["i", "i2"] + s = s.index("i3") + assert s._index == ["i", "i2", "i3"] + s = s.index() + assert s._index is None + s = Search(index=("i", "i2")) + assert s._index == ["i", "i2"] + s = Search(index=["i", "i2"]) + assert s._index == ["i", "i2"] + s = Search() + s = s.index("i", "i2") + assert s._index == ["i", "i2"] + s2 = s.index("i3") + assert s._index == ["i", "i2"] + assert s2._index == ["i", "i2", "i3"] + s = Search() + s = s.index(["i", "i2"], "i3") + assert s._index == ["i", "i2", "i3"] + s2 = s.index("i4") + assert s._index == ["i", "i2", "i3"] + assert s2._index == ["i", "i2", "i3", "i4"] + s2 = s.index(["i4"]) + assert s2._index == ["i", "i2", "i3", "i4"] + s2 = s.index(("i4", "i5")) + assert s2._index == ["i", "i2", "i3", "i4", "i5"] + + +def test_doc_type_document_class() -> None: + class MyDocument(Document): + pass + + s = Search(doc_type=MyDocument) + assert s._doc_type == [MyDocument] + assert s._doc_type_map == {} + + s = Search().doc_type(MyDocument) + assert s._doc_type == [MyDocument] + assert s._doc_type_map == {} + + +def test_knn() -> None: + s = Search() + + with raises(TypeError): + s.knn() # type: ignore[call-arg] + with raises(TypeError): + s.knn("field") # type: ignore[call-arg] + with raises(TypeError): + s.knn("field", 5) # type: ignore[call-arg] + with raises(ValueError): + s.knn("field", 5, 100) + with raises(ValueError): + s.knn("field", 5, 100, query_vector=[1, 2, 3], query_vector_builder={}) + + s = s.knn("field", 5, 100, query_vector=[1, 2, 3]) + assert { + "knn": { + "field": "field", + "k": 5, + "num_candidates": 100, + "query_vector": [1, 2, 3], + } + } == s.to_dict() + + s = s.knn( + k=4, + num_candidates=40, + boost=0.8, + field="name", + query_vector_builder={ + "text_embedding": {"model_id": "foo", "model_text": "search text"} + }, + inner_hits={"size": 1}, + ) + assert { + "knn": [ + { + "field": "field", + "k": 5, + "num_candidates": 100, + "query_vector": [1, 2, 3], + }, + { + "field": "name", + "k": 4, + "num_candidates": 40, + "query_vector_builder": { + "text_embedding": {"model_id": "foo", "model_text": "search text"} + }, + "boost": 0.8, + "inner_hits": {"size": 1}, + }, + ] + } == s.to_dict() + + +def test_rank() -> None: + s = Search() + s.rank(rrf=False) + assert {} == s.to_dict() + + s = s.rank(rrf=True) + assert {"rank": {"rrf": {}}} == s.to_dict() + + s = s.rank(rrf={"window_size": 50, "rank_constant": 20}) + assert {"rank": {"rrf": {"window_size": 50, "rank_constant": 20}}} == s.to_dict() + + +def test_sort() -> None: + s = Search() + s = s.sort("fielda", "-fieldb") + + assert ["fielda", {"fieldb": {"order": "desc"}}] == s._sort + assert {"sort": ["fielda", {"fieldb": {"order": "desc"}}]} == s.to_dict() + + s = s.sort() + assert [] == s._sort + assert Search().to_dict() == s.to_dict() + + +def test_sort_by_score() -> None: + s = Search() + s = s.sort("_score") + assert {"sort": ["_score"]} == s.to_dict() + + s = Search() + with raises(IllegalOperation): + s.sort("-_score") + + +def test_collapse() -> None: + s = Search() + + inner_hits = {"name": "most_recent", "size": 5, "sort": [{"@timestamp": "desc"}]} + s = s.collapse("user.id", inner_hits=inner_hits, max_concurrent_group_searches=4) + + assert { + "field": "user.id", + "inner_hits": { + "name": "most_recent", + "size": 5, + "sort": [{"@timestamp": "desc"}], + }, + "max_concurrent_group_searches": 4, + } == s._collapse + assert { + "collapse": { + "field": "user.id", + "inner_hits": { + "name": "most_recent", + "size": 5, + "sort": [{"@timestamp": "desc"}], + }, + "max_concurrent_group_searches": 4, + } + } == s.to_dict() + + s = s.collapse() + assert {} == s._collapse + assert Search().to_dict() == s.to_dict() + + +def test_slice() -> None: + s = Search() + assert {"from": 3, "size": 7} == s[3:10].to_dict() + assert {"size": 5} == s[:5].to_dict() + assert {"from": 3} == s[3:].to_dict() + assert {"from": 0, "size": 0} == s[0:0].to_dict() + assert {"from": 20, "size": 0} == s[20:0].to_dict() + assert {"from": 10, "size": 5} == s[10:][:5].to_dict() + assert {"from": 10, "size": 0} == s[:5][10:].to_dict() + assert {"size": 10} == s[:10][:40].to_dict() + assert {"size": 10} == s[:40][:10].to_dict() + assert {"size": 40} == s[:40][:80].to_dict() + assert {"from": 12, "size": 0} == s[:5][10:][2:].to_dict() + assert {"from": 15, "size": 0} == s[10:][:5][5:].to_dict() + assert {} == s[:].to_dict() + with raises(ValueError): + s[-1:] + with raises(ValueError): + s[4:-1] + with raises(ValueError): + s[-3:-2] + + +def test_index() -> None: + s = Search() + assert {"from": 3, "size": 1} == s[3].to_dict() + assert {"from": 3, "size": 1} == s[3][0].to_dict() + assert {"from": 8, "size": 0} == s[3][5].to_dict() + assert {"from": 4, "size": 1} == s[3:10][1].to_dict() + with raises(ValueError): + s[-3] + + +def test_search_to_dict() -> None: + s = Search() + assert {} == s.to_dict() + + s = s.query("match", f=42) + assert {"query": {"match": {"f": 42}}} == s.to_dict() + + assert {"query": {"match": {"f": 42}}, "size": 10} == s.to_dict(size=10) + + s.aggs.bucket("per_tag", "terms", field="f").metric( + "max_score", "max", field="score" + ) + d = { + "aggs": { + "per_tag": { + "terms": {"field": "f"}, + "aggs": {"max_score": {"max": {"field": "score"}}}, + } + }, + "query": {"match": {"f": 42}}, + } + assert d == s.to_dict() + + s = Search(extra={"size": 5}) + assert {"size": 5} == s.to_dict() + s = s.extra(from_=42) + assert {"size": 5, "from": 42} == s.to_dict() + + +def test_complex_example() -> None: + s = Search() + s = ( + s.query("match", title="python") + .query(~Q("match", title="ruby")) + .filter(Q("term", category="meetup") | Q("term", category="conference")) + .collapse("user_id") + .post_filter("terms", tags=["prague", "czech"]) + .script_fields(more_attendees="doc['attendees'].value + 42") + ) + + s.aggs.bucket("per_country", "terms", field="country").metric( + "avg_attendees", "avg", field="attendees" + ) + + s.query.minimum_should_match = 2 + + s = s.highlight_options(order="score").highlight("title", "body", fragment_size=50) + + assert { + "query": { + "bool": { + "filter": [ + { + "bool": { + "should": [ + {"term": {"category": "meetup"}}, + {"term": {"category": "conference"}}, + ] + } + } + ], + "must": [{"match": {"title": "python"}}], + "must_not": [{"match": {"title": "ruby"}}], + "minimum_should_match": 2, + } + }, + "post_filter": {"terms": {"tags": ["prague", "czech"]}}, + "aggs": { + "per_country": { + "terms": {"field": "country"}, + "aggs": {"avg_attendees": {"avg": {"field": "attendees"}}}, + } + }, + "collapse": {"field": "user_id"}, + "highlight": { + "order": "score", + "fields": {"title": {"fragment_size": 50}, "body": {"fragment_size": 50}}, + }, + "script_fields": {"more_attendees": {"script": "doc['attendees'].value + 42"}}, + } == s.to_dict() + + +def test_reverse() -> None: + d = { + "query": { + "bool": { + "filter": [ + { + "bool": { + "should": [ + {"term": {"category": "meetup"}}, + {"term": {"category": "conference"}}, + ] + } + } + ], + "must": [ + { + "bool": { + "must": [{"match": {"title": "python"}}], + "must_not": [{"match": {"title": "ruby"}}], + "minimum_should_match": 2, + } + } + ], + } + }, + "post_filter": {"bool": {"must": [{"terms": {"tags": ["prague", "czech"]}}]}}, + "aggs": { + "per_country": { + "terms": {"field": "country"}, + "aggs": {"avg_attendees": {"avg": {"field": "attendees"}}}, + } + }, + "sort": ["title", {"category": {"order": "desc"}}, "_score"], + "size": 5, + "highlight": {"order": "score", "fields": {"title": {"fragment_size": 50}}}, + "suggest": { + "my-title-suggestions-1": { + "text": "devloping distibutd saerch engies", + "term": {"size": 3, "field": "title"}, + } + }, + "script_fields": {"more_attendees": {"script": "doc['attendees'].value + 42"}}, + } + + d2 = deepcopy(d) + + s = Search.from_dict(d) + + # make sure we haven't modified anything in place + assert d == d2 + assert {"size": 5} == s._extra + assert d == s.to_dict() + + +def test_code_generated_classes() -> None: + s = Search() + s = ( + s.query(query.Match("title", types.MatchQuery(query="python"))) + .query(~query.Match("title", types.MatchQuery(query="ruby"))) + .query( + query.Knn( + field="title", + query_vector=[1.0, 2.0, 3.0], + num_candidates=10, + k=3, + filter=query.Range("year", wrappers.Range(gt="2004")), + ) + ) + .filter( + query.Term("category", types.TermQuery(value="meetup")) + | query.Term("category", types.TermQuery(value="conference")) + ) + .collapse("user_id") + .post_filter(query.Terms(tags=["prague", "czech"])) + .script_fields(more_attendees="doc['attendees'].value + 42") + ) + assert { + "query": { + "bool": { + "filter": [ + { + "bool": { + "should": [ + {"term": {"category": {"value": "meetup"}}}, + {"term": {"category": {"value": "conference"}}}, + ] + } + } + ], + "must": [ + {"match": {"title": {"query": "python"}}}, + { + "knn": { + "field": "title", + "filter": [ + { + "range": { + "year": { + "gt": "2004", + }, + }, + }, + ], + "k": 3, + "num_candidates": 10, + "query_vector": [ + 1.0, + 2.0, + 3.0, + ], + }, + }, + ], + "must_not": [{"match": {"title": {"query": "ruby"}}}], + } + }, + "post_filter": {"terms": {"tags": ["prague", "czech"]}}, + "collapse": {"field": "user_id"}, + "script_fields": {"more_attendees": {"script": "doc['attendees'].value + 42"}}, + } == s.to_dict() + + +def test_from_dict_doesnt_need_query() -> None: + s = Search.from_dict({"size": 5}) + + assert {"size": 5} == s.to_dict() + + +@pytest.mark.sync +def test_params_being_passed_to_search(mock_client: Any) -> None: + s = Search(using="mock") + s = s.params(routing="42") + s.execute() + + mock_client.search.assert_called_once_with(index=None, body={}, routing="42") + + +def test_source() -> None: + assert {} == Search().source().to_dict() + + assert { + "_source": {"includes": ["foo.bar.*"], "excludes": ["foo.one"]} + } == Search().source(includes=["foo.bar.*"], excludes=("foo.one",)).to_dict() + + assert {"_source": False} == Search().source(False).to_dict() + + assert {"_source": ["f1", "f2"]} == Search().source( + includes=["foo.bar.*"], excludes=["foo.one"] + ).source(["f1", "f2"]).to_dict() + + +def test_source_on_clone() -> None: + assert { + "_source": {"includes": ["foo.bar.*"], "excludes": ["foo.one"]}, + "query": {"bool": {"filter": [{"term": {"title": "python"}}]}}, + } == Search().source(includes=["foo.bar.*"]).source(excludes=["foo.one"]).filter( + "term", title="python" + ).to_dict() + assert { + "_source": False, + "query": {"bool": {"filter": [{"term": {"title": "python"}}]}}, + } == Search().source(False).filter("term", title="python").to_dict() + + +def test_source_on_clear() -> None: + assert ( + {} + == Search() + .source(includes=["foo.bar.*"]) + .source(includes=None, excludes=None) + .to_dict() + ) + + +def test_suggest_accepts_global_text() -> None: + s = Search.from_dict( + { + "suggest": { + "text": "the amsterdma meetpu", + "my-suggest-1": {"term": {"field": "title"}}, + "my-suggest-2": {"text": "other", "term": {"field": "body"}}, + } + } + ) + + assert { + "suggest": { + "my-suggest-1": { + "term": {"field": "title"}, + "text": "the amsterdma meetpu", + }, + "my-suggest-2": {"term": {"field": "body"}, "text": "other"}, + } + } == s.to_dict() + + +def test_suggest() -> None: + s = Search() + s = s.suggest("my_suggestion", "pyhton", term={"field": "title"}) + + assert { + "suggest": {"my_suggestion": {"term": {"field": "title"}, "text": "pyhton"}} + } == s.to_dict() + + +def test_exclude() -> None: + s = Search() + s = s.exclude("match", title="python") + + assert { + "query": { + "bool": { + "filter": [{"bool": {"must_not": [{"match": {"title": "python"}}]}}] + } + } + } == s.to_dict() + + +@pytest.mark.sync +def test_delete_by_query(mock_client: Any) -> None: + s = Search(using="mock", index="i").query("match", lang="java") + s.delete() + + mock_client.delete_by_query.assert_called_once_with( + index=["i"], body={"query": {"match": {"lang": "java"}}} + ) + + +def test_update_from_dict() -> None: + s = Search() + s.update_from_dict({"indices_boost": [{"important-documents": 2}]}) + s.update_from_dict({"_source": ["id", "name"]}) + s.update_from_dict({"collapse": {"field": "user_id"}}) + + assert { + "indices_boost": [{"important-documents": 2}], + "_source": ["id", "name"], + "collapse": {"field": "user_id"}, + } == s.to_dict() + + +def test_rescore_query_to_dict() -> None: + s = Search(index="index-name") + + positive_query = Q( + "function_score", + query=Q("term", tags="a"), + script_score={"script": "_score * 1"}, + ) + + negative_query = Q( + "function_score", + query=Q("term", tags="b"), + script_score={"script": "_score * -100"}, + ) + + s = s.query(positive_query) + s = s.extra( + rescore={"window_size": 100, "query": {"rescore_query": negative_query}} + ) + assert s.to_dict() == { + "query": { + "function_score": { + "query": {"term": {"tags": "a"}}, + "functions": [{"script_score": {"script": "_score * 1"}}], + } + }, + "rescore": { + "window_size": 100, + "query": { + "rescore_query": { + "function_score": { + "query": {"term": {"tags": "b"}}, + "functions": [{"script_score": {"script": "_score * -100"}}], + } + } + }, + }, + } + + assert s.to_dict( + rescore={"window_size": 10, "query": {"rescore_query": positive_query}} + ) == { + "query": { + "function_score": { + "query": {"term": {"tags": "a"}}, + "functions": [{"script_score": {"script": "_score * 1"}}], + } + }, + "rescore": { + "window_size": 10, + "query": { + "rescore_query": { + "function_score": { + "query": {"term": {"tags": "a"}}, + "functions": [{"script_score": {"script": "_score * 1"}}], + } + } + }, + }, + } + + +@pytest.mark.sync +def test_empty_search() -> None: + s = EmptySearch(index="index-name") + s = s.query("match", lang="java") + s.aggs.bucket("versions", "terms", field="version") + + assert s.count() == 0 + assert [hit for hit in s] == [] + assert [hit for hit in s.scan()] == [] + s.delete() # should not error + + +def test_suggest_completion() -> None: + s = Search() + s = s.suggest("my_suggestion", "pyhton", completion={"field": "title"}) + + assert { + "suggest": { + "my_suggestion": {"completion": {"field": "title"}, "prefix": "pyhton"} + } + } == s.to_dict() + + +def test_suggest_regex_query() -> None: + s = Search() + s = s.suggest("my_suggestion", regex="py[thon|py]", completion={"field": "title"}) + + assert { + "suggest": { + "my_suggestion": {"completion": {"field": "title"}, "regex": "py[thon|py]"} + } + } == s.to_dict() + + +def test_suggest_must_pass_text_or_regex() -> None: + s = Search() + with raises(ValueError): + s.suggest("my_suggestion") + + +def test_suggest_can_only_pass_text_or_regex() -> None: + s = Search() + with raises(ValueError): + s.suggest("my_suggestion", text="python", regex="py[hton|py]") + + +def test_suggest_regex_must_be_wtih_completion() -> None: + s = Search() + with raises(ValueError): + s.suggest("my_suggestion", regex="py[thon|py]") diff --git a/test_elasticsearch/test_dsl/_sync/test_update_by_query.py b/test_elasticsearch/test_dsl/_sync/test_update_by_query.py new file mode 100644 index 000000000..390257ffb --- /dev/null +++ b/test_elasticsearch/test_dsl/_sync/test_update_by_query.py @@ -0,0 +1,180 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from copy import deepcopy +from typing import Any + +import pytest + +from elasticsearch.dsl import Q, UpdateByQuery +from elasticsearch.dsl.response import UpdateByQueryResponse +from elasticsearch.dsl.search_base import SearchBase + + +def test_ubq_starts_with_no_query() -> None: + ubq = UpdateByQuery() + + assert ubq.query._proxied is None + + +def test_ubq_to_dict() -> None: + ubq = UpdateByQuery() + assert {} == ubq.to_dict() + + ubq = ubq.query("match", f=42) + assert {"query": {"match": {"f": 42}}} == ubq.to_dict() + + assert {"query": {"match": {"f": 42}}, "size": 10} == ubq.to_dict(size=10) + + ubq = UpdateByQuery(extra={"size": 5}) + assert {"size": 5} == ubq.to_dict() + + ubq = UpdateByQuery(extra={"extra_q": Q("term", category="conference")}) + assert {"extra_q": {"term": {"category": "conference"}}} == ubq.to_dict() + + +def test_complex_example() -> None: + ubq = UpdateByQuery() + ubq = ( + ubq.query("match", title="python") + .query(~Q("match", title="ruby")) + .filter(Q("term", category="meetup") | Q("term", category="conference")) + .script( + source="ctx._source.likes += params.f", lang="painless", params={"f": 3} + ) + ) + + ubq.query.minimum_should_match = 2 + assert { + "query": { + "bool": { + "filter": [ + { + "bool": { + "should": [ + {"term": {"category": "meetup"}}, + {"term": {"category": "conference"}}, + ] + } + } + ], + "must": [{"match": {"title": "python"}}], + "must_not": [{"match": {"title": "ruby"}}], + "minimum_should_match": 2, + } + }, + "script": { + "source": "ctx._source.likes += params.f", + "lang": "painless", + "params": {"f": 3}, + }, + } == ubq.to_dict() + + +def test_exclude() -> None: + ubq = UpdateByQuery() + ubq = ubq.exclude("match", title="python") + + assert { + "query": { + "bool": { + "filter": [{"bool": {"must_not": [{"match": {"title": "python"}}]}}] + } + } + } == ubq.to_dict() + + +def test_reverse() -> None: + d = { + "query": { + "bool": { + "filter": [ + { + "bool": { + "should": [ + {"term": {"category": "meetup"}}, + {"term": {"category": "conference"}}, + ] + } + } + ], + "must": [ + { + "bool": { + "must": [{"match": {"title": "python"}}], + "must_not": [{"match": {"title": "ruby"}}], + "minimum_should_match": 2, + } + } + ], + } + }, + "script": { + "source": "ctx._source.likes += params.f", + "lang": "painless", + "params": {"f": 3}, + }, + } + + d2 = deepcopy(d) + + ubq = UpdateByQuery.from_dict(d) + + assert d == d2 + assert d == ubq.to_dict() + + +def test_from_dict_doesnt_need_query() -> None: + ubq = UpdateByQuery.from_dict({"script": {"source": "test"}}) + + assert {"script": {"source": "test"}} == ubq.to_dict() + + +@pytest.mark.sync +def test_params_being_passed_to_search(mock_client: Any) -> None: + ubq = UpdateByQuery(using="mock", index="i") + ubq = ubq.params(routing="42") + ubq.execute() + + mock_client.update_by_query.assert_called_once_with(index=["i"], routing="42") + + +def test_overwrite_script() -> None: + ubq = UpdateByQuery() + ubq = ubq.script( + source="ctx._source.likes += params.f", lang="painless", params={"f": 3} + ) + assert { + "script": { + "source": "ctx._source.likes += params.f", + "lang": "painless", + "params": {"f": 3}, + } + } == ubq.to_dict() + ubq = ubq.script(source="ctx._source.likes++") + assert {"script": {"source": "ctx._source.likes++"}} == ubq.to_dict() + + +def test_update_by_query_response_success() -> None: + ubqr = UpdateByQueryResponse(SearchBase(), {"timed_out": False, "failures": []}) + assert ubqr.success() + + ubqr = UpdateByQueryResponse(SearchBase(), {"timed_out": True, "failures": []}) + assert not ubqr.success() + + ubqr = UpdateByQueryResponse(SearchBase(), {"timed_out": False, "failures": [{}]}) + assert not ubqr.success() diff --git a/test_elasticsearch/test_dsl/async_sleep.py b/test_elasticsearch/test_dsl/async_sleep.py new file mode 100644 index 000000000..ce5ced1c5 --- /dev/null +++ b/test_elasticsearch/test_dsl/async_sleep.py @@ -0,0 +1,24 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import asyncio +from typing import Union + + +async def sleep(secs: Union[int, float]) -> None: + """Tests can use this function to sleep.""" + await asyncio.sleep(secs) diff --git a/test_elasticsearch/test_dsl/conftest.py b/test_elasticsearch/test_dsl/conftest.py new file mode 100644 index 000000000..f1d865761 --- /dev/null +++ b/test_elasticsearch/test_dsl/conftest.py @@ -0,0 +1,486 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + + +import asyncio +import os +import re +import time +from datetime import datetime +from typing import Any, AsyncGenerator, Dict, Generator, Tuple, cast +from unittest import SkipTest, TestCase +from unittest.mock import AsyncMock, Mock + +import pytest_asyncio +from elastic_transport import ObjectApiResponse +from elasticsearch import AsyncElasticsearch, Elasticsearch +from elasticsearch.exceptions import ConnectionError +from elasticsearch.helpers import bulk +from pytest import fixture, skip + +from elasticsearch.dsl import Search +from elasticsearch.dsl.async_connections import add_connection as add_async_connection +from elasticsearch.dsl.async_connections import connections as async_connections +from elasticsearch.dsl.connections import add_connection, connections + +from .test_integration._async import test_document as async_document +from .test_integration._sync import test_document as sync_document +from .test_integration.test_data import ( + DATA, + FLAT_DATA, + TEST_GIT_DATA, + create_flat_git_index, + create_git_index, +) + +if "ELASTICSEARCH_URL" in os.environ: + ELASTICSEARCH_URL = os.environ["ELASTICSEARCH_URL"] +else: + ELASTICSEARCH_URL = "http://localhost:9200" + + +def get_test_client(wait: bool = True, **kwargs: Any) -> Elasticsearch: + # construct kwargs from the environment + kw: Dict[str, Any] = {"request_timeout": 30} + + if "PYTHON_CONNECTION_CLASS" in os.environ: + kw["node_class"] = os.environ["PYTHON_CONNECTION_CLASS"] + + kw.update(kwargs) + client = Elasticsearch(ELASTICSEARCH_URL, **kw) + + # wait for yellow status + for tries_left in range(100 if wait else 1, 0, -1): + try: + client.cluster.health(wait_for_status="yellow") + return client + except ConnectionError: + if wait and tries_left == 1: + raise + time.sleep(0.1) + + raise SkipTest("Elasticsearch failed to start.") + + +async def get_async_test_client(wait: bool = True, **kwargs: Any) -> AsyncElasticsearch: + # construct kwargs from the environment + kw: Dict[str, Any] = {"request_timeout": 30} + + if "PYTHON_CONNECTION_CLASS" in os.environ: + kw["node_class"] = os.environ["PYTHON_CONNECTION_CLASS"] + + kw.update(kwargs) + client = AsyncElasticsearch(ELASTICSEARCH_URL, **kw) + + # wait for yellow status + for tries_left in range(100 if wait else 1, 0, -1): + try: + await client.cluster.health(wait_for_status="yellow") + return client + except ConnectionError: + if wait and tries_left == 1: + raise + await asyncio.sleep(0.1) + + await client.close() + raise SkipTest("Elasticsearch failed to start.") + + +class ElasticsearchTestCase(TestCase): + client: Elasticsearch + + @staticmethod + def _get_client() -> Elasticsearch: + return get_test_client() + + @classmethod + def setup_class(cls) -> None: + cls.client = cls._get_client() + + def teardown_method(self, _: Any) -> None: + # Hidden indices expanded in wildcards in ES 7.7 + expand_wildcards = ["open", "closed"] + if self.es_version() >= (7, 7): + expand_wildcards.append("hidden") + + self.client.indices.delete_data_stream( + name="*", expand_wildcards=expand_wildcards + ) + self.client.indices.delete(index="*", expand_wildcards=expand_wildcards) + self.client.indices.delete_template(name="*") + self.client.indices.delete_index_template(name="*") + + def es_version(self) -> Tuple[int, ...]: + if not hasattr(self, "_es_version"): + self._es_version = _get_version(self.client.info()["version"]["number"]) + return self._es_version + + +def _get_version(version_string: str) -> Tuple[int, ...]: + if "." not in version_string: + return () + version = version_string.strip().split(".") + return tuple(int(v) if v.isdigit() else 999 for v in version) + + +@fixture(scope="session") +def client() -> Elasticsearch: + try: + connection = get_test_client(wait="WAIT_FOR_ES" in os.environ) + add_connection("default", connection) + return connection + except SkipTest: + skip() + + +@pytest_asyncio.fixture +async def async_client() -> AsyncGenerator[AsyncElasticsearch, None]: + try: + connection = await get_async_test_client(wait="WAIT_FOR_ES" in os.environ) + add_async_connection("default", connection) + yield connection + await connection.close() + except SkipTest: + skip() + + +@fixture(scope="session") +def es_version(client: Elasticsearch) -> Generator[Tuple[int, ...], None, None]: + info = client.info() + yield tuple( + int(x) + for x in re.match(r"^([0-9.]+)", info["version"]["number"]).group(1).split(".") # type: ignore + ) + + +@fixture +def write_client(client: Elasticsearch) -> Generator[Elasticsearch, None, None]: + yield client + for index_name in client.indices.get(index="test-*", expand_wildcards="all"): + client.indices.delete(index=index_name) + client.options(ignore_status=404).indices.delete_template(name="test-template") + client.options(ignore_status=404).indices.delete_index_template( + name="test-template" + ) + + +@pytest_asyncio.fixture +async def async_write_client( + write_client: Elasticsearch, async_client: AsyncElasticsearch +) -> AsyncGenerator[AsyncElasticsearch, None]: + yield async_client + + +@fixture +def mock_client( + dummy_response: ObjectApiResponse[Any], +) -> Generator[Elasticsearch, None, None]: + client = Mock() + client.search.return_value = dummy_response + client.update_by_query.return_value = dummy_response + add_connection("mock", client) + + yield client + connections._conns = {} + connections._kwargs = {} + + +@fixture +def async_mock_client( + dummy_response: ObjectApiResponse[Any], +) -> Generator[Elasticsearch, None, None]: + client = Mock() + client.search = AsyncMock(return_value=dummy_response) + client.indices = AsyncMock() + client.update_by_query = AsyncMock() + client.delete_by_query = AsyncMock() + add_async_connection("mock", client) + + yield client + async_connections._conns = {} + async_connections._kwargs = {} + + +@fixture(scope="session") +def data_client(client: Elasticsearch) -> Generator[Elasticsearch, None, None]: + # create mappings + create_git_index(client, "git") + create_flat_git_index(client, "flat-git") + # load data + bulk(client, DATA, raise_on_error=True, refresh=True) + bulk(client, FLAT_DATA, raise_on_error=True, refresh=True) + yield client + client.indices.delete(index="git") + client.indices.delete(index="flat-git") + + +@pytest_asyncio.fixture +async def async_data_client( + data_client: Elasticsearch, async_client: AsyncElasticsearch +) -> AsyncGenerator[AsyncElasticsearch, None]: + yield async_client + + +@fixture +def dummy_response() -> ObjectApiResponse[Any]: + return ObjectApiResponse( + meta=None, + body={ + "_shards": {"failed": 0, "successful": 10, "total": 10}, + "hits": { + "hits": [ + { + "_index": "test-index", + "_type": "company", + "_id": "elasticsearch", + "_score": 12.0, + "_source": {"city": "Amsterdam", "name": "Elasticsearch"}, + }, + { + "_index": "test-index", + "_type": "employee", + "_id": "42", + "_score": 11.123, + "_routing": "elasticsearch", + "_source": { + "name": {"first": "Shay", "last": "Bannon"}, + "lang": "java", + "twitter": "kimchy", + }, + }, + { + "_index": "test-index", + "_type": "employee", + "_id": "47", + "_score": 1, + "_routing": "elasticsearch", + "_source": { + "name": {"first": "Honza", "last": "Král"}, + "lang": "python", + "twitter": "honzakral", + }, + }, + { + "_index": "test-index", + "_type": "employee", + "_id": "53", + "_score": 16.0, + "_routing": "elasticsearch", + }, + ], + "max_score": 12.0, + "total": 123, + }, + "timed_out": False, + "took": 123, + }, + ) + + +@fixture +def aggs_search() -> Search: + s = Search(index="flat-git") + s.aggs.bucket("popular_files", "terms", field="files", size=2).metric( + "line_stats", "stats", field="stats.lines" + ).metric("top_commits", "top_hits", size=2, _source=["stats.*", "committed_date"]) + s.aggs.bucket( + "per_month", "date_histogram", interval="month", field="info.committed_date" + ) + s.aggs.metric("sum_lines", "sum", field="stats.lines") + return s + + +@fixture +def aggs_data() -> Dict[str, Any]: + return { + "took": 4, + "timed_out": False, + "_shards": {"total": 1, "successful": 1, "failed": 0}, + "hits": {"total": 52, "hits": [], "max_score": 0.0}, + "aggregations": { + "sum_lines": {"value": 25052.0}, + "per_month": { + "buckets": [ + { + "doc_count": 38, + "key": 1393632000000, + "key_as_string": "2014-03-01T00:00:00.000Z", + }, + { + "doc_count": 11, + "key": 1396310400000, + "key_as_string": "2014-04-01T00:00:00.000Z", + }, + { + "doc_count": 3, + "key": 1398902400000, + "key_as_string": "2014-05-01T00:00:00.000Z", + }, + ] + }, + "popular_files": { + "buckets": [ + { + "key": "elasticsearch_dsl", + "line_stats": { + "count": 40, + "max": 228.0, + "min": 2.0, + "sum": 2151.0, + "avg": 53.775, + }, + "doc_count": 40, + "top_commits": { + "hits": { + "total": 40, + "hits": [ + { + "_id": "3ca6e1e73a071a705b4babd2f581c91a2a3e5037", + "_type": "doc", + "_source": { + "stats": { + "files": 4, + "deletions": 7, + "lines": 30, + "insertions": 23, + }, + "committed_date": "2014-05-02T13:47:19", + }, + "_score": 1.0, + "_index": "flat-git", + }, + { + "_id": "eb3e543323f189fd7b698e66295427204fff5755", + "_type": "doc", + "_source": { + "stats": { + "files": 1, + "deletions": 0, + "lines": 18, + "insertions": 18, + }, + "committed_date": "2014-05-01T13:32:14", + }, + "_score": 1.0, + "_index": "flat-git", + }, + ], + "max_score": 1.0, + } + }, + }, + { + "key": "test_elasticsearch_dsl", + "line_stats": { + "count": 35, + "max": 228.0, + "min": 2.0, + "sum": 1939.0, + "avg": 55.4, + }, + "doc_count": 35, + "top_commits": { + "hits": { + "total": 35, + "hits": [ + { + "_id": "3ca6e1e73a071a705b4babd2f581c91a2a3e5037", + "_type": "doc", + "_source": { + "stats": { + "files": 4, + "deletions": 7, + "lines": 30, + "insertions": 23, + }, + "committed_date": "2014-05-02T13:47:19", + }, + "_score": 1.0, + "_index": "flat-git", + }, + { + "_id": "dd15b6ba17dd9ba16363a51f85b31f66f1fb1157", + "_type": "doc", + "_source": { + "stats": { + "files": 3, + "deletions": 18, + "lines": 62, + "insertions": 44, + }, + "committed_date": "2014-05-01T13:30:44", + }, + "_score": 1.0, + "_index": "flat-git", + }, + ], + "max_score": 1.0, + } + }, + }, + ], + "doc_count_error_upper_bound": 0, + "sum_other_doc_count": 120, + }, + }, + } + + +def make_pr(pr_module: Any) -> Any: + return pr_module.PullRequest( + _id=42, + comments=[ + pr_module.Comment( + content="Hello World!", + author=pr_module.User(name="honzakral"), + created_at=datetime(2018, 1, 9, 10, 17, 3, 21184), + history=[ + pr_module.History( + timestamp=datetime(2012, 1, 1), + diff="-Ahoj Svete!\n+Hello World!", + ) + ], + ), + ], + created_at=datetime(2018, 1, 9, 9, 17, 3, 21184), + ) + + +@fixture +def pull_request(write_client: Elasticsearch) -> sync_document.PullRequest: + sync_document.PullRequest.init() + pr = cast(sync_document.PullRequest, make_pr(sync_document)) + pr.save(refresh=True) + return pr + + +@pytest_asyncio.fixture +async def async_pull_request( + async_write_client: AsyncElasticsearch, +) -> async_document.PullRequest: + await async_document.PullRequest.init() + pr = cast(async_document.PullRequest, make_pr(async_document)) + await pr.save(refresh=True) + return pr + + +@fixture +def setup_ubq_tests(client: Elasticsearch) -> str: + index = "test-git" + create_git_index(client, index) + bulk(client, TEST_GIT_DATA, raise_on_error=True, refresh=True) + return index diff --git a/test_elasticsearch/test_dsl/sleep.py b/test_elasticsearch/test_dsl/sleep.py new file mode 100644 index 000000000..83009566e --- /dev/null +++ b/test_elasticsearch/test_dsl/sleep.py @@ -0,0 +1,24 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import time +from typing import Union + + +def sleep(secs: Union[int, float]) -> None: + """Tests can use this function to sleep.""" + time.sleep(secs) diff --git a/test_elasticsearch/test_dsl/test_aggs.py b/test_elasticsearch/test_dsl/test_aggs.py new file mode 100644 index 000000000..f1dc10aa5 --- /dev/null +++ b/test_elasticsearch/test_dsl/test_aggs.py @@ -0,0 +1,530 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from pytest import raises + +from elasticsearch.dsl import aggs, query, types + + +def test_repr() -> None: + max_score = aggs.Max(field="score") + a = aggs.A("terms", field="tags", aggs={"max_score": max_score}) + + assert "Terms(aggs={'max_score': Max(field='score')}, field='tags')" == repr(a) + + +def test_meta() -> None: + max_score = aggs.Max(field="score") + a = aggs.A( + "terms", field="tags", aggs={"max_score": max_score}, meta={"some": "metadata"} + ) + + assert { + "terms": {"field": "tags"}, + "aggs": {"max_score": {"max": {"field": "score"}}}, + "meta": {"some": "metadata"}, + } == a.to_dict() + + +def test_meta_from_dict() -> None: + max_score = aggs.Max(field="score") + a = aggs.A( + "terms", field="tags", aggs={"max_score": max_score}, meta={"some": "metadata"} + ) + + assert aggs.A(a.to_dict()) == a + + +def test_A_creates_proper_agg() -> None: + a = aggs.A("terms", field="tags") + + assert isinstance(a, aggs.Terms) + assert a._params == {"field": "tags"} + + +def test_A_handles_nested_aggs_properly() -> None: + max_score = aggs.Max(field="score") + a = aggs.A("terms", field="tags", aggs={"max_score": max_score}) + + assert isinstance(a, aggs.Terms) + assert a._params == {"field": "tags", "aggs": {"max_score": max_score}} + + +def test_A_passes_aggs_through() -> None: + a = aggs.A("terms", field="tags") + assert aggs.A(a) is a + + +def test_A_from_dict() -> None: + d = { + "terms": {"field": "tags"}, + "aggs": {"per_author": {"terms": {"field": "author.raw"}}}, + } + a = aggs.A(d) + + assert isinstance(a, aggs.Terms) + assert a._params == { + "field": "tags", + "aggs": {"per_author": aggs.A("terms", field="author.raw")}, + } + assert a["per_author"] == aggs.A("terms", field="author.raw") + assert a.aggs.per_author == aggs.A("terms", field="author.raw") # type: ignore[attr-defined] + + +def test_A_fails_with_incorrect_dict() -> None: + correct_d = { + "terms": {"field": "tags"}, + "aggs": {"per_author": {"terms": {"field": "author.raw"}}}, + } + + with raises(Exception): + aggs.A(correct_d, field="f") + + d = correct_d.copy() + del d["terms"] + with raises(Exception): + aggs.A(d) + + d = correct_d.copy() + d["xx"] = {} + with raises(Exception): + aggs.A(d) + + +def test_A_fails_with_agg_and_params() -> None: + a = aggs.A("terms", field="tags") + + with raises(Exception): + aggs.A(a, field="score") + + +def test_buckets_are_nestable() -> None: + a = aggs.Terms(field="tags") + b = a.bucket("per_author", "terms", field="author.raw") + + assert isinstance(b, aggs.Terms) + assert b._params == {"field": "author.raw"} + assert a.aggs == {"per_author": b} + + +def test_metric_inside_buckets() -> None: + a = aggs.Terms(field="tags") + b = a.metric("max_score", "max", field="score") + + # returns bucket so it's chainable + assert a is b + assert a.aggs["max_score"] == aggs.Max(field="score") + + +def test_buckets_equals_counts_subaggs() -> None: + a = aggs.Terms(field="tags") + a.bucket("per_author", "terms", field="author.raw") + b = aggs.Terms(field="tags") + + assert a != b + + +def test_buckets_to_dict() -> None: + a = aggs.Terms(field="tags") + a.bucket("per_author", "terms", field="author.raw") + + assert { + "terms": {"field": "tags"}, + "aggs": {"per_author": {"terms": {"field": "author.raw"}}}, + } == a.to_dict() + + a = aggs.Terms(field="tags") + a.metric("max_score", "max", field="score") + + assert { + "terms": {"field": "tags"}, + "aggs": {"max_score": {"max": {"field": "score"}}}, + } == a.to_dict() + + +def test_nested_buckets_are_reachable_as_getitem() -> None: + a = aggs.Terms(field="tags") + b = a.bucket("per_author", "terms", field="author.raw") + + assert a["per_author"] is not b + assert a["per_author"] == b + + +def test_nested_buckets_are_settable_as_getitem() -> None: + a = aggs.Terms(field="tags") + b = a["per_author"] = aggs.A("terms", field="author.raw") + + assert a.aggs["per_author"] is b + + +def test_filter_can_be_instantiated_using_positional_args() -> None: + a = aggs.Filter(query.Q("term", f=42)) + + assert {"filter": {"term": {"f": 42}}} == a.to_dict() + + assert a == aggs.A("filter", query.Q("term", f=42)) + + +def test_filter_aggregation_as_nested_agg() -> None: + a = aggs.Terms(field="tags") + a.bucket("filtered", "filter", query.Q("term", f=42)) + + assert { + "terms": {"field": "tags"}, + "aggs": {"filtered": {"filter": {"term": {"f": 42}}}}, + } == a.to_dict() + + +def test_filter_aggregation_with_nested_aggs() -> None: + a = aggs.Filter(query.Q("term", f=42)) + a.bucket("testing", "terms", field="tags") + + assert { + "filter": {"term": {"f": 42}}, + "aggs": {"testing": {"terms": {"field": "tags"}}}, + } == a.to_dict() + + +def test_filters_correctly_identifies_the_hash() -> None: + a = aggs.A( + "filters", + filters={ + "group_a": {"term": {"group": "a"}}, + "group_b": {"term": {"group": "b"}}, + }, + ) + + assert { + "filters": { + "filters": { + "group_a": {"term": {"group": "a"}}, + "group_b": {"term": {"group": "b"}}, + } + } + } == a.to_dict() + assert a.filters.group_a == query.Q("term", group="a") + + +def test_bucket_sort_agg() -> None: + # test the dictionary (type ignored) and fully typed alterantives + bucket_sort_agg = aggs.BucketSort(sort=[{"total_sales": {"order": "desc"}}], size=3) # type: ignore + assert bucket_sort_agg.to_dict() == { + "bucket_sort": {"sort": [{"total_sales": {"order": "desc"}}], "size": 3} + } + bucket_sort_agg = aggs.BucketSort( + sort=[types.SortOptions("total_sales", types.FieldSort(order="desc"))], size=3 + ) + assert bucket_sort_agg.to_dict() == { + "bucket_sort": {"sort": [{"total_sales": {"order": "desc"}}], "size": 3} + } + + a = aggs.DateHistogram(field="date", interval="month") + a.bucket("total_sales", "sum", field="price") + a.bucket( + "sales_bucket_sort", + "bucket_sort", + sort=[{"total_sales": {"order": "desc"}}], + size=3, + ) + assert { + "date_histogram": {"field": "date", "interval": "month"}, + "aggs": { + "total_sales": {"sum": {"field": "price"}}, + "sales_bucket_sort": { + "bucket_sort": {"sort": [{"total_sales": {"order": "desc"}}], "size": 3} + }, + }, + } == a.to_dict() + + +def test_bucket_sort_agg_only_trnunc() -> None: + # test the dictionary (type ignored) and fully typed alterantives + bucket_sort_agg = aggs.BucketSort(**{"from": 1, "size": 1, "_expand__to_dot": False}) # type: ignore + assert bucket_sort_agg.to_dict() == {"bucket_sort": {"from": 1, "size": 1}} + bucket_sort_agg = aggs.BucketSort(from_=1, size=1, _expand__to_dot=False) + assert bucket_sort_agg.to_dict() == {"bucket_sort": {"from": 1, "size": 1}} + + a = aggs.DateHistogram(field="date", interval="month") + a.bucket("bucket_truncate", "bucket_sort", **{"from": 1, "size": 1}) + assert { + "date_histogram": {"field": "date", "interval": "month"}, + "aggs": {"bucket_truncate": {"bucket_sort": {"from": 1, "size": 1}}}, + } == a.to_dict() + + +def test_geohash_grid_aggregation() -> None: + # test the dictionary (type ignored) and fully typed alterantives + a = aggs.GeohashGrid(**{"field": "centroid", "precision": 3}) # type: ignore + assert {"geohash_grid": {"field": "centroid", "precision": 3}} == a.to_dict() + a = aggs.GeohashGrid(field="centroid", precision=3) + assert {"geohash_grid": {"field": "centroid", "precision": 3}} == a.to_dict() + + +def test_geohex_grid_aggregation() -> None: + # test the dictionary (type ignored) and fully typed alterantives + a = aggs.GeohexGrid(**{"field": "centroid", "precision": 3}) # type: ignore + assert {"geohex_grid": {"field": "centroid", "precision": 3}} == a.to_dict() + a = aggs.GeohexGrid(field="centroid", precision=3) + assert {"geohex_grid": {"field": "centroid", "precision": 3}} == a.to_dict() + + +def test_geotile_grid_aggregation() -> None: + # test the dictionary (type ignored) and fully typed alterantives + a = aggs.GeotileGrid(**{"field": "centroid", "precision": 3}) # type: ignore + assert {"geotile_grid": {"field": "centroid", "precision": 3}} == a.to_dict() + a = aggs.GeotileGrid(field="centroid", precision=3) + assert {"geotile_grid": {"field": "centroid", "precision": 3}} == a.to_dict() + + +def test_boxplot_aggregation() -> None: + a = aggs.Boxplot(field="load_time") + + assert {"boxplot": {"field": "load_time"}} == a.to_dict() + + +def test_rare_terms_aggregation() -> None: + a = aggs.RareTerms(field="the-field") + a.bucket("total_sales", "sum", field="price") + a.bucket( + "sales_bucket_sort", + "bucket_sort", + sort=[{"total_sales": {"order": "desc"}}], + size=3, + ) + + assert { + "aggs": { + "sales_bucket_sort": { + "bucket_sort": {"size": 3, "sort": [{"total_sales": {"order": "desc"}}]} + }, + "total_sales": {"sum": {"field": "price"}}, + }, + "rare_terms": {"field": "the-field"}, + } == a.to_dict() + + +def test_variable_width_histogram_aggregation() -> None: + a = aggs.VariableWidthHistogram(field="price", buckets=2) + assert {"variable_width_histogram": {"buckets": 2, "field": "price"}} == a.to_dict() + + +def test_ip_prefix_aggregation() -> None: + # test the dictionary (type ignored) and fully typed alterantives + a = aggs.IPPrefix(**{"field": "ipv4", "prefix_length": 24}) # type: ignore + assert {"ip_prefix": {"field": "ipv4", "prefix_length": 24}} == a.to_dict() + a = aggs.IPPrefix(field="ipv4", prefix_length=24) + assert {"ip_prefix": {"field": "ipv4", "prefix_length": 24}} == a.to_dict() + + +def test_ip_prefix_aggregation_extra() -> None: + a = aggs.IPPrefix(field="ipv6", prefix_length=64, is_ipv6=True) + + assert { + "ip_prefix": { + "field": "ipv6", + "prefix_length": 64, + "is_ipv6": True, + }, + } == a.to_dict() + + +def test_multi_terms_aggregation() -> None: + a = aggs.MultiTerms(terms=[{"field": "tags"}, {"field": "author.row"}]) + assert { + "multi_terms": { + "terms": [ + {"field": "tags"}, + {"field": "author.row"}, + ] + } + } == a.to_dict() + a = aggs.MultiTerms( + terms=[ + types.MultiTermLookup(field="tags"), + types.MultiTermLookup(field="author.row"), + ] + ) + assert { + "multi_terms": { + "terms": [ + {"field": "tags"}, + {"field": "author.row"}, + ] + } + } == a.to_dict() + + +def test_categorize_text_aggregation() -> None: + a = aggs.CategorizeText( + field="tags", + categorization_filters=["\\w+\\_\\d{3}"], + max_matched_tokens=2, + similarity_threshold=30, + ) + assert { + "categorize_text": { + "field": "tags", + "categorization_filters": ["\\w+\\_\\d{3}"], + "max_matched_tokens": 2, + "similarity_threshold": 30, + } + } == a.to_dict() + + +def test_median_absolute_deviation_aggregation() -> None: + a = aggs.MedianAbsoluteDeviation(field="rating") + + assert {"median_absolute_deviation": {"field": "rating"}} == a.to_dict() + + +def test_t_test_aggregation() -> None: + a = aggs.TTest( + a={"field": "startup_time_before"}, + b={"field": "startup_time_after"}, + type="paired", + ) + + assert { + "t_test": { + "a": {"field": "startup_time_before"}, + "b": {"field": "startup_time_after"}, + "type": "paired", + } + } == a.to_dict() + + +def test_geo_line_aggregation() -> None: + a = aggs.GeoLine(point={"field": "centroid"}, sort={"field": "date"}) + + assert { + "geo_line": { + "point": {"field": "centroid"}, + "sort": {"field": "date"}, + }, + } == a.to_dict() + + +def test_inference_aggregation() -> None: + a = aggs.Inference(model_id="model-id", buckets_path={"agg_name": "agg_name"}) + assert { + "inference": {"buckets_path": {"agg_name": "agg_name"}, "model_id": "model-id"} + } == a.to_dict() + + +def test_matrix_stats_aggregation() -> None: + a = aggs.MatrixStats(fields=["poverty", "income"]) + + assert {"matrix_stats": {"fields": ["poverty", "income"]}} == a.to_dict() + + +def test_moving_percentiles_aggregation() -> None: + a = aggs.DateHistogram() + a.bucket("the_percentile", "percentiles", field="price", percents=[1.0, 99.0]) + a.pipeline( + "the_movperc", "moving_percentiles", buckets_path="the_percentile", window=10 + ) + + assert { + "aggs": { + "the_movperc": { + "moving_percentiles": {"buckets_path": "the_percentile", "window": 10} + }, + "the_percentile": { + "percentiles": {"field": "price", "percents": [1.0, 99.0]} + }, + }, + "date_histogram": {}, + } == a.to_dict() + + +def test_normalize_aggregation() -> None: + a = aggs.Normalize(buckets_path="normalized", method="percent_of_sum") + assert { + "normalize": {"buckets_path": "normalized", "method": "percent_of_sum"} + } == a.to_dict() + + +def test_random_sampler_aggregation() -> None: + a = aggs.RandomSampler(probability=0.1).metric( + "price_percentiles", + "percentiles", + field="price", + ) + + assert { + "random_sampler": { + "probability": 0.1, + }, + "aggs": { + "price_percentiles": { + "percentiles": {"field": "price"}, + }, + }, + } == a.to_dict() + + +def test_adjancecy_matrix_aggregation() -> None: + a = aggs.AdjacencyMatrix(filters={"grpA": {"terms": {"accounts": ["hillary", "sidney"]}}, "grpB": {"terms": {"accounts": ["donald", "mitt"]}}, "grpC": {"terms": {"accounts": ["vladimir", "nigel"]}}}) # type: ignore + assert { + "adjacency_matrix": { + "filters": { + "grpA": {"terms": {"accounts": ["hillary", "sidney"]}}, + "grpB": {"terms": {"accounts": ["donald", "mitt"]}}, + "grpC": {"terms": {"accounts": ["vladimir", "nigel"]}}, + } + } + } == a.to_dict() + a = aggs.AdjacencyMatrix( + filters={ + "grpA": query.Terms(accounts=["hillary", "sidney"]), + "grpB": query.Terms(accounts=["donald", "mitt"]), + "grpC": query.Terms(accounts=["vladimir", "nigel"]), + } + ) + assert { + "adjacency_matrix": { + "filters": { + "grpA": {"terms": {"accounts": ["hillary", "sidney"]}}, + "grpB": {"terms": {"accounts": ["donald", "mitt"]}}, + "grpC": {"terms": {"accounts": ["vladimir", "nigel"]}}, + } + } + } == a.to_dict() + + +def test_top_metrics_aggregation() -> None: + # test the dictionary (type ignored) and fully typed alterantives + a = aggs.TopMetrics(metrics={"field": "m"}, sort={"s": "desc"}) # type: ignore + assert { + "top_metrics": {"metrics": {"field": "m"}, "sort": {"s": "desc"}} + } == a.to_dict() + a = aggs.TopMetrics( + metrics=types.TopMetricsValue(field="m"), + sort=types.SortOptions("s", types.FieldSort(order="desc")), + ) + assert { + "top_metrics": {"metrics": {"field": "m"}, "sort": {"s": {"order": "desc"}}} + } == a.to_dict() + + +def test_bucket_agg_with_filter() -> None: + b = aggs.Filter(query.Terms(something=[1, 2, 3])) + + a = aggs.Terms(field="some_field", size=100) + a.bucket("b", b) + + assert a.aggs["b"] == a["b"] # a['b'] threw exception before patch #1902 diff --git a/test_elasticsearch/test_dsl/test_analysis.py b/test_elasticsearch/test_dsl/test_analysis.py new file mode 100644 index 000000000..47a08672d --- /dev/null +++ b/test_elasticsearch/test_dsl/test_analysis.py @@ -0,0 +1,216 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from pytest import raises + +from elasticsearch.dsl import analysis + + +def test_analyzer_serializes_as_name() -> None: + a = analysis.analyzer("my_analyzer") + + assert "my_analyzer" == a.to_dict() # type: ignore + + +def test_analyzer_has_definition() -> None: + a = analysis.CustomAnalyzer( + "my_analyzer", tokenizer="keyword", filter=["lowercase"] + ) + + assert { + "type": "custom", + "tokenizer": "keyword", + "filter": ["lowercase"], + } == a.get_definition() + + +def test_simple_multiplexer_filter() -> None: + a = analysis.analyzer( + "my_analyzer", + tokenizer="keyword", + filter=[ + analysis.token_filter( + "my_multi", "multiplexer", filters=["lowercase", "lowercase, stop"] + ) + ], + ) + + assert { + "analyzer": { + "my_analyzer": { + "filter": ["my_multi"], + "tokenizer": "keyword", + "type": "custom", + } + }, + "filter": { + "my_multi": { + "filters": ["lowercase", "lowercase, stop"], + "type": "multiplexer", + } + }, + } == a.get_analysis_definition() + + +def test_multiplexer_with_custom_filter() -> None: + a = analysis.analyzer( + "my_analyzer", + tokenizer="keyword", + filter=[ + analysis.token_filter( + "my_multi", + "multiplexer", + filters=[ + [analysis.token_filter("en", "snowball", language="English")], + "lowercase, stop", + ], + ) + ], + ) + + assert { + "analyzer": { + "my_analyzer": { + "filter": ["my_multi"], + "tokenizer": "keyword", + "type": "custom", + } + }, + "filter": { + "en": {"type": "snowball", "language": "English"}, + "my_multi": {"filters": ["en", "lowercase, stop"], "type": "multiplexer"}, + }, + } == a.get_analysis_definition() + + +def test_conditional_token_filter() -> None: + a = analysis.analyzer( + "my_cond", + tokenizer=analysis.tokenizer("keyword"), + filter=[ + analysis.token_filter( + "testing", + "condition", + script={"source": "return true"}, + filter=[ + "lowercase", + analysis.token_filter("en", "snowball", language="English"), + ], + ), + "stop", + ], + ) + + assert { + "analyzer": { + "my_cond": { + "filter": ["testing", "stop"], + "tokenizer": "keyword", + "type": "custom", + } + }, + "filter": { + "en": {"language": "English", "type": "snowball"}, + "testing": { + "script": {"source": "return true"}, + "filter": ["lowercase", "en"], + "type": "condition", + }, + }, + } == a.get_analysis_definition() + + +def test_conflicting_nested_filters_cause_error() -> None: + a = analysis.analyzer( + "my_cond", + tokenizer=analysis.tokenizer("keyword"), + filter=[ + analysis.token_filter("en", "stemmer", language="english"), + analysis.token_filter( + "testing", + "condition", + script={"source": "return true"}, + filter=[ + "lowercase", + analysis.token_filter("en", "snowball", language="English"), + ], + ), + ], + ) + + with raises(ValueError): + a.get_analysis_definition() + + +def test_normalizer_serializes_as_name() -> None: + n = analysis.normalizer("my_normalizer") + + assert "my_normalizer" == n.to_dict() # type: ignore + + +def test_normalizer_has_definition() -> None: + n = analysis.CustomNormalizer( + "my_normalizer", filter=["lowercase", "asciifolding"], char_filter=["quote"] + ) + + assert { + "type": "custom", + "filter": ["lowercase", "asciifolding"], + "char_filter": ["quote"], + } == n.get_definition() + + +def test_tokenizer() -> None: + t = analysis.tokenizer("trigram", "nGram", min_gram=3, max_gram=3) + + assert t.to_dict() == "trigram" # type: ignore + assert {"type": "nGram", "min_gram": 3, "max_gram": 3} == t.get_definition() + + +def test_custom_analyzer_can_collect_custom_items() -> None: + trigram = analysis.tokenizer("trigram", "nGram", min_gram=3, max_gram=3) + my_stop = analysis.token_filter("my_stop", "stop", stopwords=["a", "b"]) + umlauts = analysis.char_filter("umlauts", "pattern_replace", mappings=["ü=>ue"]) + a = analysis.analyzer( + "my_analyzer", + tokenizer=trigram, + filter=["lowercase", my_stop], + char_filter=["html_strip", umlauts], + ) + + assert a.to_dict() == "my_analyzer" # type: ignore + assert { + "analyzer": { + "my_analyzer": { + "type": "custom", + "tokenizer": "trigram", + "filter": ["lowercase", "my_stop"], + "char_filter": ["html_strip", "umlauts"], + } + }, + "tokenizer": {"trigram": trigram.get_definition()}, + "filter": {"my_stop": my_stop.get_definition()}, + "char_filter": {"umlauts": umlauts.get_definition()}, + } == a.get_analysis_definition() + + +def test_stemmer_analyzer_can_pass_name() -> None: + t = analysis.token_filter( + "my_english_filter", name="minimal_english", type="stemmer" + ) + assert t.to_dict() == "my_english_filter" # type: ignore + assert {"type": "stemmer", "name": "minimal_english"} == t.get_definition() diff --git a/test_elasticsearch/test_dsl/test_connections.py b/test_elasticsearch/test_dsl/test_connections.py new file mode 100644 index 000000000..96706d298 --- /dev/null +++ b/test_elasticsearch/test_dsl/test_connections.py @@ -0,0 +1,143 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import Any, List + +from elasticsearch import Elasticsearch +from pytest import raises + +from elasticsearch.dsl import connections, serializer + + +class DummyElasticsearch: + def __init__(self, *args: Any, hosts: List[str], **kwargs: Any): + self.hosts = hosts + + +def test_default_connection_is_returned_by_default() -> None: + c = connections.Connections[object](elasticsearch_class=object) + + con, con2 = object(), object() + c.add_connection("default", con) + + c.add_connection("not-default", con2) + + assert c.get_connection() is con + + +def test_get_connection_created_connection_if_needed() -> None: + c = connections.Connections[DummyElasticsearch]( + elasticsearch_class=DummyElasticsearch + ) + c.configure( + default={"hosts": ["https://es.com:9200"]}, + local={"hosts": ["https://localhost:9200"]}, + ) + + default = c.get_connection() + local = c.get_connection("local") + + assert isinstance(default, DummyElasticsearch) + assert isinstance(local, DummyElasticsearch) + + assert default.hosts == ["https://es.com:9200"] + assert local.hosts == ["https://localhost:9200"] + + +def test_configure_preserves_unchanged_connections() -> None: + c = connections.Connections[DummyElasticsearch]( + elasticsearch_class=DummyElasticsearch + ) + + c.configure( + default={"hosts": ["https://es.com:9200"]}, + local={"hosts": ["https://localhost:9200"]}, + ) + default = c.get_connection() + local = c.get_connection("local") + + c.configure( + default={"hosts": ["https://not-es.com:9200"]}, + local={"hosts": ["https://localhost:9200"]}, + ) + new_default = c.get_connection() + new_local = c.get_connection("local") + + assert new_local is local + assert new_default is not default + + +def test_remove_connection_removes_both_conn_and_conf() -> None: + c = connections.Connections[object](elasticsearch_class=DummyElasticsearch) + + c.configure( + default={"hosts": ["https://es.com:9200"]}, + local={"hosts": ["https://localhost:9200"]}, + ) + c.add_connection("local2", object()) + + c.remove_connection("default") + c.get_connection("local2") + c.remove_connection("local2") + + with raises(Exception): + c.get_connection("local2") + c.get_connection("default") + + +def test_create_connection_constructs_client() -> None: + c = connections.Connections[DummyElasticsearch]( + elasticsearch_class=DummyElasticsearch + ) + c.create_connection("testing", hosts=["https://es.com:9200"]) + + con = c.get_connection("testing") + assert con.hosts == ["https://es.com:9200"] + + +def test_create_connection_adds_our_serializer() -> None: + c = connections.Connections[Elasticsearch](elasticsearch_class=Elasticsearch) + c.create_connection("testing", hosts=["https://es.com:9200"]) + + c_serializers = c.get_connection("testing").transport.serializers + assert c_serializers.serializers["application/json"] is serializer.serializer + + +def test_connection_has_correct_user_agent() -> None: + c = connections.Connections[Elasticsearch](elasticsearch_class=Elasticsearch) + + c.create_connection("testing", hosts=["https://es.com:9200"]) + assert ( + c.get_connection("testing") + ._headers["user-agent"] + .startswith("elasticsearch-dsl-py/") + ) + + my_client = Elasticsearch(hosts=["http://localhost:9200"]) + my_client = my_client.options(headers={"user-agent": "my-user-agent/1.0"}) + c.add_connection("default", my_client) + assert c.get_connection()._headers["user-agent"].startswith("elasticsearch-dsl-py/") + + my_client = Elasticsearch(hosts=["http://localhost:9200"]) + assert ( + c.get_connection(my_client) + ._headers["user-agent"] + .startswith("elasticsearch-dsl-py/") + ) + + not_a_client = object() + assert c.get_connection(not_a_client) == not_a_client # type: ignore[arg-type] diff --git a/test_elasticsearch/test_dsl/test_field.py b/test_elasticsearch/test_dsl/test_field.py new file mode 100644 index 000000000..423936ae3 --- /dev/null +++ b/test_elasticsearch/test_dsl/test_field.py @@ -0,0 +1,234 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import base64 +import ipaddress +from datetime import date, datetime, time +from typing import cast + +import pytest +from dateutil import tz + +from elasticsearch.dsl import InnerDoc, Range, ValidationException, field + + +def test_date_range_deserialization() -> None: + data = {"lt": "2018-01-01T00:30:10"} + + r = field.DateRange().deserialize(data) + + assert isinstance(r, Range) + assert r.lt == datetime(2018, 1, 1, 0, 30, 10) + + +def test_boolean_deserialization() -> None: + bf = field.Boolean() + + assert not bf.deserialize("false") + assert not bf.deserialize(False) + assert not bf.deserialize("") + assert not bf.deserialize(0) + + assert bf.deserialize(True) + assert bf.deserialize("true") + assert bf.deserialize(1) + + +def test_datetime_deserialization() -> None: + f = field.Date() + dt = datetime.now() + assert dt == f._deserialize(dt.isoformat()) + + d = date.today() + assert datetime.combine(d, time()) == f._deserialize(d.isoformat()) + + +def test_date_deserialization() -> None: + f = field.Date(format="yyyy-MM-dd") + d = date.today() + assert d == f._deserialize(d.isoformat()) + + dt = datetime.now() + assert dt.date() == f._deserialize(dt.isoformat()) + + +def test_date_field_can_have_default_tz() -> None: + f = field.Date(default_timezone="UTC") + now = datetime.now() + + now_with_tz = cast(datetime, f._deserialize(now)) + + assert now_with_tz.tzinfo == tz.gettz("UTC") + assert now.isoformat() + "+00:00" == now_with_tz.isoformat() + + now_with_tz = cast(datetime, f._deserialize(now.isoformat())) + + assert now_with_tz.tzinfo == tz.gettz("UTC") + assert now.isoformat() + "+00:00" == now_with_tz.isoformat() + + +def test_custom_field_car_wrap_other_field() -> None: + class MyField(field.CustomField): + @property + def builtin_type(self) -> field.Text: + return field.Text(**self._params) + + assert {"type": "text", "index": "not_analyzed"} == MyField( + index="not_analyzed" + ).to_dict() + + +def test_field_from_dict() -> None: + f = field.construct_field({"type": "text", "index": "not_analyzed"}) + + assert isinstance(f, field.Text) + assert {"type": "text", "index": "not_analyzed"} == f.to_dict() + + +def test_multi_fields_are_accepted_and_parsed() -> None: + f = field.construct_field( + "text", + fields={"raw": {"type": "keyword"}, "eng": field.Text(analyzer="english")}, + ) + + assert isinstance(f, field.Text) + assert { + "type": "text", + "fields": { + "raw": {"type": "keyword"}, + "eng": {"type": "text", "analyzer": "english"}, + }, + } == f.to_dict() + + +def test_nested_provides_direct_access_to_its_fields() -> None: + f = field.Nested(properties={"name": {"type": "text", "index": "not_analyzed"}}) + + assert "name" in f + assert f["name"] == field.Text(index="not_analyzed") + + +def test_field_supports_multiple_analyzers() -> None: + f = field.Text(analyzer="snowball", search_analyzer="keyword") + assert { + "analyzer": "snowball", + "search_analyzer": "keyword", + "type": "text", + } == f.to_dict() + + +def test_multifield_supports_multiple_analyzers() -> None: + f = field.Text( + fields={ + "f1": field.Text(search_analyzer="keyword", analyzer="snowball"), + "f2": field.Text(analyzer="keyword"), + } + ) + assert { + "fields": { + "f1": { + "analyzer": "snowball", + "search_analyzer": "keyword", + "type": "text", + }, + "f2": {"analyzer": "keyword", "type": "text"}, + }, + "type": "text", + } == f.to_dict() + + +def test_scaled_float() -> None: + with pytest.raises(TypeError): + field.ScaledFloat() # type: ignore + f = field.ScaledFloat(123) + assert f.to_dict() == {"scaling_factor": 123, "type": "scaled_float"} + + +def test_ipaddress() -> None: + f = field.Ip() + assert f.deserialize("127.0.0.1") == ipaddress.ip_address("127.0.0.1") + assert f.deserialize("::1") == ipaddress.ip_address("::1") + assert f.serialize(f.deserialize("::1")) == "::1" + assert f.deserialize(None) is None + with pytest.raises(ValueError): + assert f.deserialize("not_an_ipaddress") + + +def test_float() -> None: + f = field.Float() + assert f.deserialize("42") == 42.0 + assert f.deserialize(None) is None + with pytest.raises(ValueError): + assert f.deserialize("not_a_float") + + +def test_integer() -> None: + f = field.Integer() + assert f.deserialize("42") == 42 + assert f.deserialize(None) is None + with pytest.raises(ValueError): + assert f.deserialize("not_an_integer") + + +def test_binary() -> None: + f = field.Binary() + assert f.deserialize(base64.b64encode(b"42")) == b"42" + assert f.deserialize(f.serialize(b"42")) == b"42" + assert f.deserialize(None) is None + + +def test_constant_keyword() -> None: + f = field.ConstantKeyword() + assert f.to_dict() == {"type": "constant_keyword"} + + +def test_rank_features() -> None: + f = field.RankFeatures() + assert f.to_dict() == {"type": "rank_features"} + + +def test_object_dynamic_values() -> None: + f = field.Object(dynamic=True) + assert f.to_dict()["dynamic"] is True + f = field.Object(dynamic=False) + assert f.to_dict()["dynamic"] is False + f = field.Object(dynamic="strict") + assert f.to_dict()["dynamic"] == "strict" + + +def test_object_disabled() -> None: + f = field.Object(enabled=False) + assert f.to_dict() == {"type": "object", "enabled": False} + + +def test_object_constructor() -> None: + expected = {"type": "object", "properties": {"inner_int": {"type": "integer"}}} + + class Inner(InnerDoc): + inner_int = field.Integer() + + obj_from_doc = field.Object(doc_class=Inner) + assert obj_from_doc.to_dict() == expected + + obj_from_props = field.Object(properties={"inner_int": field.Integer()}) + assert obj_from_props.to_dict() == expected + + with pytest.raises(ValidationException): + field.Object(doc_class=Inner, properties={"inner_int": field.Integer()}) + + with pytest.raises(ValidationException): + field.Object(doc_class=Inner, dynamic=False) diff --git a/test_elasticsearch/test_dsl/test_integration/__init__.py b/test_elasticsearch/test_dsl/test_integration/__init__.py new file mode 100644 index 000000000..2a87d183f --- /dev/null +++ b/test_elasticsearch/test_dsl/test_integration/__init__.py @@ -0,0 +1,16 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/test_elasticsearch/test_dsl/test_integration/_async/__init__.py b/test_elasticsearch/test_dsl/test_integration/_async/__init__.py new file mode 100644 index 000000000..2a87d183f --- /dev/null +++ b/test_elasticsearch/test_dsl/test_integration/_async/__init__.py @@ -0,0 +1,16 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/test_elasticsearch/test_dsl/test_integration/_async/test_analysis.py b/test_elasticsearch/test_dsl/test_integration/_async/test_analysis.py new file mode 100644 index 000000000..1feae56cf --- /dev/null +++ b/test_elasticsearch/test_dsl/test_integration/_async/test_analysis.py @@ -0,0 +1,54 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import pytest +from elasticsearch import AsyncElasticsearch + +from elasticsearch.dsl import analyzer, token_filter, tokenizer + + +@pytest.mark.asyncio +async def test_simulate_with_just__builtin_tokenizer( + async_client: AsyncElasticsearch, +) -> None: + a = analyzer("my-analyzer", tokenizer="keyword") + tokens = (await a.async_simulate("Hello World!", using=async_client)).tokens + + assert len(tokens) == 1 + assert tokens[0].token == "Hello World!" + + +@pytest.mark.asyncio +async def test_simulate_complex(async_client: AsyncElasticsearch) -> None: + a = analyzer( + "my-analyzer", + tokenizer=tokenizer("split_words", "simple_pattern_split", pattern=":"), + filter=["lowercase", token_filter("no-ifs", "stop", stopwords=["if"])], + ) + + tokens = (await a.async_simulate("if:this:works", using=async_client)).tokens + + assert len(tokens) == 2 + assert ["this", "works"] == [t.token for t in tokens] + + +@pytest.mark.asyncio +async def test_simulate_builtin(async_client: AsyncElasticsearch) -> None: + a = analyzer("my-analyzer", "english") + tokens = (await a.async_simulate("fixes running")).tokens + + assert ["fix", "run"] == [t.token for t in tokens] diff --git a/test_elasticsearch/test_dsl/test_integration/_async/test_document.py b/test_elasticsearch/test_dsl/test_integration/_async/test_document.py new file mode 100644 index 000000000..83b683e1e --- /dev/null +++ b/test_elasticsearch/test_dsl/test_integration/_async/test_document.py @@ -0,0 +1,852 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# this file creates several documents using bad or no types because +# these are still supported and should be kept functional in spite +# of not having appropriate type hints. For that reason the comment +# below disables many mypy checks that fails as a result of this. +# mypy: disable-error-code="assignment, index, arg-type, call-arg, operator, comparison-overlap, attr-defined" + +from datetime import datetime +from ipaddress import ip_address +from typing import TYPE_CHECKING, Any, AsyncIterator, Dict, List, Tuple, Union + +import pytest +from elasticsearch import AsyncElasticsearch, ConflictError, NotFoundError +from elasticsearch.helpers.errors import BulkIndexError +from pytest import raises +from pytz import timezone + +from elasticsearch.dsl import ( + AsyncDocument, + AsyncSearch, + Binary, + Boolean, + Date, + DenseVector, + Double, + InnerDoc, + Ip, + Keyword, + Long, + Mapping, + MetaField, + Nested, + Object, + Q, + RankFeatures, + Text, + analyzer, + mapped_field, +) +from elasticsearch.dsl.utils import AttrList + +snowball = analyzer("my_snow", tokenizer="standard", filter=["lowercase", "snowball"]) + + +class User(InnerDoc): + name = Text(fields={"raw": Keyword()}) + + +class Wiki(AsyncDocument): + owner = Object(User) + views = Long() + ranked = RankFeatures() + + class Index: + name = "test-wiki" + + +class Repository(AsyncDocument): + owner = Object(User) + created_at = Date() + description = Text(analyzer=snowball) + tags = Keyword() + + @classmethod + def search(cls) -> AsyncSearch["Repository"]: # type: ignore[override] + return super().search().filter("term", commit_repo="repo") + + class Index: + name = "git" + + +class Commit(AsyncDocument): + committed_date = Date() + authored_date = Date() + description = Text(analyzer=snowball) + + class Index: + name = "flat-git" + + class Meta: + mapping = Mapping() + + +class History(InnerDoc): + timestamp = Date() + diff = Text() + + +class Comment(InnerDoc): + content = Text() + created_at = Date() + author = Object(User) + history = Nested(History) + + class Meta: + dynamic = MetaField(False) + + +class PullRequest(AsyncDocument): + comments = Nested(Comment) + created_at = Date() + + class Index: + name = "test-prs" + + +class SerializationDoc(AsyncDocument): + i = Long() + b = Boolean() + d = Double() + bin = Binary() + ip = Ip() + + class Index: + name = "test-serialization" + + +class Tags(AsyncDocument): + tags = Keyword(multi=True) + + class Index: + name = "tags" + + +@pytest.mark.asyncio +async def test_serialization(async_write_client: AsyncElasticsearch) -> None: + await SerializationDoc.init() + await async_write_client.index( + index="test-serialization", + id=42, + body={ + "i": [1, 2, "3", None], + "b": [True, False, "true", "false", None], + "d": [0.1, "-0.1", None], + "bin": ["SGVsbG8gV29ybGQ=", None], + "ip": ["::1", "127.0.0.1", None], + }, + ) + sd = await SerializationDoc.get(id=42) + assert sd is not None + + assert sd.i == [1, 2, 3, None] + assert sd.b == [True, False, True, False, None] + assert sd.d == [0.1, -0.1, None] + assert sd.bin == [b"Hello World", None] + assert sd.ip == [ip_address("::1"), ip_address("127.0.0.1"), None] + + assert sd.to_dict() == { + "b": [True, False, True, False, None], + "bin": ["SGVsbG8gV29ybGQ=", None], + "d": [0.1, -0.1, None], + "i": [1, 2, 3, None], + "ip": ["::1", "127.0.0.1", None], + } + + +@pytest.mark.asyncio +async def test_nested_inner_hits_are_wrapped_properly(async_pull_request: Any) -> None: + history_query = Q( + "nested", + path="comments.history", + inner_hits={}, + query=Q("match", comments__history__diff="ahoj"), + ) + s = PullRequest.search().query( + "nested", inner_hits={}, path="comments", query=history_query + ) + + response = await s.execute() + pr = response.hits[0] + assert isinstance(pr, PullRequest) + assert isinstance(pr.comments[0], Comment) + assert isinstance(pr.comments[0].history[0], History) + + comment = pr.meta.inner_hits.comments.hits[0] + assert isinstance(comment, Comment) + assert comment.author.name == "honzakral" + assert isinstance(comment.history[0], History) + + history = comment.meta.inner_hits["comments.history"].hits[0] + assert isinstance(history, History) + assert history.timestamp == datetime(2012, 1, 1) + assert "score" in history.meta + + +@pytest.mark.asyncio +async def test_nested_inner_hits_are_deserialized_properly( + async_pull_request: Any, +) -> None: + s = PullRequest.search().query( + "nested", + inner_hits={}, + path="comments", + query=Q("match", comments__content="hello"), + ) + + response = await s.execute() + pr = response.hits[0] + assert isinstance(pr.created_at, datetime) + assert isinstance(pr.comments[0], Comment) + assert isinstance(pr.comments[0].created_at, datetime) + + +@pytest.mark.asyncio +async def test_nested_top_hits_are_wrapped_properly(async_pull_request: Any) -> None: + s = PullRequest.search() + s.aggs.bucket("comments", "nested", path="comments").metric( + "hits", "top_hits", size=1 + ) + + r = await s.execute() + + print(r._d_) + assert isinstance(r.aggregations.comments.hits.hits[0], Comment) + + +@pytest.mark.asyncio +async def test_update_object_field(async_write_client: AsyncElasticsearch) -> None: + await Wiki.init() + w = Wiki( + owner=User(name="Honza Kral"), + _id="elasticsearch-py", + ranked={"test1": 0.1, "topic2": 0.2}, + ) + await w.save() + + assert "updated" == await w.update(owner=[{"name": "Honza"}, User(name="Nick")]) + assert w.owner[0].name == "Honza" + assert w.owner[1].name == "Nick" + + w = await Wiki.get(id="elasticsearch-py") + assert w.owner[0].name == "Honza" + assert w.owner[1].name == "Nick" + + assert w.ranked == {"test1": 0.1, "topic2": 0.2} + + +@pytest.mark.asyncio +async def test_update_script(async_write_client: AsyncElasticsearch) -> None: + await Wiki.init() + w = Wiki(owner=User(name="Honza Kral"), _id="elasticsearch-py", views=42) + await w.save() + + await w.update(script="ctx._source.views += params.inc", inc=5) + w = await Wiki.get(id="elasticsearch-py") + assert w.views == 47 + + +@pytest.mark.asyncio +async def test_update_script_with_dict(async_write_client: AsyncElasticsearch) -> None: + await Wiki.init() + w = Wiki(owner=User(name="Honza Kral"), _id="elasticsearch-py", views=42) + await w.save() + + await w.update( + script={ + "source": "ctx._source.views += params.inc1 + params.inc2", + "params": {"inc1": 2}, + "lang": "painless", + }, + inc2=3, + ) + w = await Wiki.get(id="elasticsearch-py") + assert w.views == 47 + + +@pytest.mark.asyncio +async def test_update_retry_on_conflict(async_write_client: AsyncElasticsearch) -> None: + await Wiki.init() + w = Wiki(owner=User(name="Honza Kral"), _id="elasticsearch-py", views=42) + await w.save() + + w1 = await Wiki.get(id="elasticsearch-py") + w2 = await Wiki.get(id="elasticsearch-py") + assert w1 is not None + assert w2 is not None + + await w1.update( + script="ctx._source.views += params.inc", inc=5, retry_on_conflict=1 + ) + await w2.update( + script="ctx._source.views += params.inc", inc=5, retry_on_conflict=1 + ) + + w = await Wiki.get(id="elasticsearch-py") + assert w.views == 52 + + +@pytest.mark.asyncio +@pytest.mark.parametrize("retry_on_conflict", [None, 0]) +async def test_update_conflicting_version( + async_write_client: AsyncElasticsearch, retry_on_conflict: bool +) -> None: + await Wiki.init() + w = Wiki(owner=User(name="Honza Kral"), _id="elasticsearch-py", views=42) + await w.save() + + w1 = await Wiki.get(id="elasticsearch-py") + w2 = await Wiki.get(id="elasticsearch-py") + assert w1 is not None + assert w2 is not None + + await w1.update(script="ctx._source.views += params.inc", inc=5) + + with raises(ConflictError): + await w2.update( + script="ctx._source.views += params.inc", + inc=5, + retry_on_conflict=retry_on_conflict, + ) + + +@pytest.mark.asyncio +async def test_save_and_update_return_doc_meta( + async_write_client: AsyncElasticsearch, +) -> None: + await Wiki.init() + w = Wiki(owner=User(name="Honza Kral"), _id="elasticsearch-py", views=42) + resp = await w.save(return_doc_meta=True) + assert resp["_index"] == "test-wiki" + assert resp["result"] == "created" + assert set(resp.keys()) == { + "_id", + "_index", + "_primary_term", + "_seq_no", + "_shards", + "_version", + "result", + } + + resp = await w.update( + script="ctx._source.views += params.inc", inc=5, return_doc_meta=True + ) + assert resp["_index"] == "test-wiki" + assert resp["result"] == "updated" + assert set(resp.keys()) == { + "_id", + "_index", + "_primary_term", + "_seq_no", + "_shards", + "_version", + "result", + } + + +@pytest.mark.asyncio +async def test_init(async_write_client: AsyncElasticsearch) -> None: + await Repository.init(index="test-git") + + assert await async_write_client.indices.exists(index="test-git") + + +@pytest.mark.asyncio +async def test_get_raises_404_on_index_missing( + async_data_client: AsyncElasticsearch, +) -> None: + with raises(NotFoundError): + await Repository.get("elasticsearch-dsl-php", index="not-there") + + +@pytest.mark.asyncio +async def test_get_raises_404_on_non_existent_id( + async_data_client: AsyncElasticsearch, +) -> None: + with raises(NotFoundError): + await Repository.get("elasticsearch-dsl-php") + + +@pytest.mark.asyncio +async def test_get_returns_none_if_404_ignored( + async_data_client: AsyncElasticsearch, +) -> None: + assert None is await Repository.get( + "elasticsearch-dsl-php", using=async_data_client.options(ignore_status=404) + ) + + +@pytest.mark.asyncio +async def test_get_returns_none_if_404_ignored_and_index_doesnt_exist( + async_data_client: AsyncElasticsearch, +) -> None: + assert None is await Repository.get( + "42", index="not-there", using=async_data_client.options(ignore_status=404) + ) + + +@pytest.mark.asyncio +async def test_get(async_data_client: AsyncElasticsearch) -> None: + elasticsearch_repo = await Repository.get("elasticsearch-dsl-py") + + assert isinstance(elasticsearch_repo, Repository) + assert elasticsearch_repo.owner.name == "elasticsearch" + assert datetime(2014, 3, 3) == elasticsearch_repo.created_at + + +@pytest.mark.asyncio +async def test_exists_return_true(async_data_client: AsyncElasticsearch) -> None: + assert await Repository.exists("elasticsearch-dsl-py") + + +@pytest.mark.asyncio +async def test_exists_false(async_data_client: AsyncElasticsearch) -> None: + assert not await Repository.exists("elasticsearch-dsl-php") + + +@pytest.mark.asyncio +async def test_get_with_tz_date(async_data_client: AsyncElasticsearch) -> None: + first_commit = await Commit.get( + id="3ca6e1e73a071a705b4babd2f581c91a2a3e5037", routing="elasticsearch-dsl-py" + ) + assert first_commit is not None + + tzinfo = timezone("Europe/Prague") + assert ( + tzinfo.localize(datetime(2014, 5, 2, 13, 47, 19, 123000)) + == first_commit.authored_date + ) + + +@pytest.mark.asyncio +async def test_save_with_tz_date(async_data_client: AsyncElasticsearch) -> None: + tzinfo = timezone("Europe/Prague") + first_commit = await Commit.get( + id="3ca6e1e73a071a705b4babd2f581c91a2a3e5037", routing="elasticsearch-dsl-py" + ) + assert first_commit is not None + + first_commit.committed_date = tzinfo.localize( + datetime(2014, 5, 2, 13, 47, 19, 123456) + ) + await first_commit.save() + + first_commit = await Commit.get( + id="3ca6e1e73a071a705b4babd2f581c91a2a3e5037", routing="elasticsearch-dsl-py" + ) + assert first_commit is not None + + assert ( + tzinfo.localize(datetime(2014, 5, 2, 13, 47, 19, 123456)) + == first_commit.committed_date + ) + + +COMMIT_DOCS_WITH_MISSING = [ + {"_id": "0"}, # Missing + {"_id": "3ca6e1e73a071a705b4babd2f581c91a2a3e5037"}, # Existing + {"_id": "f"}, # Missing + {"_id": "eb3e543323f189fd7b698e66295427204fff5755"}, # Existing +] + + +@pytest.mark.asyncio +async def test_mget(async_data_client: AsyncElasticsearch) -> None: + commits = await Commit.mget(COMMIT_DOCS_WITH_MISSING) + assert commits[0] is None + assert commits[1] is not None + assert commits[1].meta.id == "3ca6e1e73a071a705b4babd2f581c91a2a3e5037" + assert commits[2] is None + assert commits[3] is not None + assert commits[3].meta.id == "eb3e543323f189fd7b698e66295427204fff5755" + + +@pytest.mark.asyncio +async def test_mget_raises_exception_when_missing_param_is_invalid( + async_data_client: AsyncElasticsearch, +) -> None: + with raises(ValueError): + await Commit.mget(COMMIT_DOCS_WITH_MISSING, missing="raj") + + +@pytest.mark.asyncio +async def test_mget_raises_404_when_missing_param_is_raise( + async_data_client: AsyncElasticsearch, +) -> None: + with raises(NotFoundError): + await Commit.mget(COMMIT_DOCS_WITH_MISSING, missing="raise") + + +@pytest.mark.asyncio +async def test_mget_ignores_missing_docs_when_missing_param_is_skip( + async_data_client: AsyncElasticsearch, +) -> None: + commits = await Commit.mget(COMMIT_DOCS_WITH_MISSING, missing="skip") + assert commits[0] is not None + assert commits[0].meta.id == "3ca6e1e73a071a705b4babd2f581c91a2a3e5037" + assert commits[1] is not None + assert commits[1].meta.id == "eb3e543323f189fd7b698e66295427204fff5755" + + +@pytest.mark.asyncio +async def test_update_works_from_search_response( + async_data_client: AsyncElasticsearch, +) -> None: + elasticsearch_repo = (await Repository.search().execute())[0] + + await elasticsearch_repo.update(owner={"other_name": "elastic"}) + assert "elastic" == elasticsearch_repo.owner.other_name + + new_version = await Repository.get("elasticsearch-dsl-py") + assert new_version is not None + assert "elastic" == new_version.owner.other_name + assert "elasticsearch" == new_version.owner.name + + +@pytest.mark.asyncio +async def test_update(async_data_client: AsyncElasticsearch) -> None: + elasticsearch_repo = await Repository.get("elasticsearch-dsl-py") + assert elasticsearch_repo is not None + v = elasticsearch_repo.meta.version + + old_seq_no = elasticsearch_repo.meta.seq_no + await elasticsearch_repo.update( + owner={"new_name": "elastic"}, new_field="testing-update" + ) + + assert "elastic" == elasticsearch_repo.owner.new_name + assert "testing-update" == elasticsearch_repo.new_field + + # assert version has been updated + assert elasticsearch_repo.meta.version == v + 1 + + new_version = await Repository.get("elasticsearch-dsl-py") + assert new_version is not None + assert "testing-update" == new_version.new_field + assert "elastic" == new_version.owner.new_name + assert "elasticsearch" == new_version.owner.name + assert "seq_no" in new_version.meta + assert new_version.meta.seq_no != old_seq_no + assert "primary_term" in new_version.meta + + +@pytest.mark.asyncio +async def test_save_updates_existing_doc(async_data_client: AsyncElasticsearch) -> None: + elasticsearch_repo = await Repository.get("elasticsearch-dsl-py") + assert elasticsearch_repo is not None + + elasticsearch_repo.new_field = "testing-save" + old_seq_no = elasticsearch_repo.meta.seq_no + assert "updated" == await elasticsearch_repo.save() + + new_repo = await async_data_client.get(index="git", id="elasticsearch-dsl-py") + assert "testing-save" == new_repo["_source"]["new_field"] + assert new_repo["_seq_no"] != old_seq_no + assert new_repo["_seq_no"] == elasticsearch_repo.meta.seq_no + + +@pytest.mark.asyncio +async def test_update_empty_field(async_client: AsyncElasticsearch) -> None: + await Tags._index.delete(ignore_unavailable=True) + await Tags.init() + d = Tags(id="123", tags=["a", "b"]) + await d.save(refresh=True) + await d.update(tags=[], refresh=True) + assert d.tags == [] + + r = await Tags.search().execute() + assert r.hits[0].tags == [] + + +@pytest.mark.asyncio +async def test_save_automatically_uses_seq_no_and_primary_term( + async_data_client: AsyncElasticsearch, +) -> None: + elasticsearch_repo = await Repository.get("elasticsearch-dsl-py") + assert elasticsearch_repo is not None + elasticsearch_repo.meta.seq_no += 1 + + with raises(ConflictError): + await elasticsearch_repo.save() + + +@pytest.mark.asyncio +async def test_delete_automatically_uses_seq_no_and_primary_term( + async_data_client: AsyncElasticsearch, +) -> None: + elasticsearch_repo = await Repository.get("elasticsearch-dsl-py") + assert elasticsearch_repo is not None + elasticsearch_repo.meta.seq_no += 1 + + with raises(ConflictError): + await elasticsearch_repo.delete() + + +def assert_doc_equals(expected: Any, actual: Any) -> None: + for f in expected: + assert f in actual + assert actual[f] == expected[f] + + +@pytest.mark.asyncio +async def test_can_save_to_different_index( + async_write_client: AsyncElasticsearch, +) -> None: + test_repo = Repository(description="testing", meta={"id": 42}) + assert await test_repo.save(index="test-document") + + assert_doc_equals( + { + "found": True, + "_index": "test-document", + "_id": "42", + "_source": {"description": "testing"}, + }, + await async_write_client.get(index="test-document", id=42), + ) + + +@pytest.mark.asyncio +async def test_save_without_skip_empty_will_include_empty_fields( + async_write_client: AsyncElasticsearch, +) -> None: + test_repo = Repository(field_1=[], field_2=None, field_3={}, meta={"id": 42}) + assert await test_repo.save(index="test-document", skip_empty=False) + + assert_doc_equals( + { + "found": True, + "_index": "test-document", + "_id": "42", + "_source": {"field_1": [], "field_2": None, "field_3": {}}, + }, + await async_write_client.get(index="test-document", id=42), + ) + + +@pytest.mark.asyncio +async def test_delete(async_write_client: AsyncElasticsearch) -> None: + await async_write_client.create( + index="test-document", + id="elasticsearch-dsl-py", + body={ + "organization": "elasticsearch", + "created_at": "2014-03-03", + "owner": {"name": "elasticsearch"}, + }, + ) + + test_repo = Repository(meta={"id": "elasticsearch-dsl-py"}) + test_repo.meta.index = "test-document" + await test_repo.delete() + + assert not await async_write_client.exists( + index="test-document", + id="elasticsearch-dsl-py", + ) + + +@pytest.mark.asyncio +async def test_search(async_data_client: AsyncElasticsearch) -> None: + assert await Repository.search().count() == 1 + + +@pytest.mark.asyncio +async def test_search_returns_proper_doc_classes( + async_data_client: AsyncElasticsearch, +) -> None: + result = await Repository.search().execute() + + elasticsearch_repo = result.hits[0] + + assert isinstance(elasticsearch_repo, Repository) + assert elasticsearch_repo.owner.name == "elasticsearch" + + +@pytest.mark.asyncio +async def test_refresh_mapping(async_data_client: AsyncElasticsearch) -> None: + class Commit(AsyncDocument): + class Index: + name = "git" + + await Commit._index.load_mappings() + + assert "stats" in Commit._index._mapping + assert "committer" in Commit._index._mapping + assert "description" in Commit._index._mapping + assert "committed_date" in Commit._index._mapping + assert isinstance(Commit._index._mapping["committed_date"], Date) + + +@pytest.mark.asyncio +async def test_highlight_in_meta(async_data_client: AsyncElasticsearch) -> None: + commit = ( + await Commit.search() + .query("match", description="inverting") + .highlight("description") + .execute() + )[0] + + assert isinstance(commit, Commit) + assert "description" in commit.meta.highlight + assert isinstance(commit.meta.highlight["description"], AttrList) + assert len(commit.meta.highlight["description"]) > 0 + + +@pytest.mark.asyncio +async def test_bulk(async_data_client: AsyncElasticsearch) -> None: + class Address(InnerDoc): + street: str + active: bool + + class Doc(AsyncDocument): + if TYPE_CHECKING: + _id: int + name: str + age: int + languages: List[str] = mapped_field(Keyword()) + addresses: List[Address] + + class Index: + name = "bulk-index" + + await Doc._index.delete(ignore_unavailable=True) + await Doc.init() + + async def gen1() -> AsyncIterator[Union[Doc, Dict[str, Any]]]: + yield Doc( + name="Joe", + age=33, + languages=["en", "fr"], + addresses=[ + Address(street="123 Main St", active=True), + Address(street="321 Park Dr.", active=False), + ], + ) + yield Doc(name="Susan", age=20, languages=["en"]) + yield {"_op_type": "create", "_id": "45", "_source": Doc(name="Sarah", age=45)} + + await Doc.bulk(gen1(), refresh=True) + docs = list(await Doc.search().execute()) + assert len(docs) == 3 + assert docs[0].to_dict() == { + "name": "Joe", + "age": 33, + "languages": [ + "en", + "fr", + ], + "addresses": [ + { + "active": True, + "street": "123 Main St", + }, + { + "active": False, + "street": "321 Park Dr.", + }, + ], + } + assert docs[1].to_dict() == { + "name": "Susan", + "age": 20, + "languages": ["en"], + } + assert docs[2].to_dict() == { + "name": "Sarah", + "age": 45, + } + assert docs[2].meta.id == "45" + + async def gen2() -> AsyncIterator[Union[Doc, Dict[str, Any]]]: + yield {"_op_type": "create", "_id": "45", "_source": Doc(name="Sarah", age=45)} + + # a "create" action with an existing id should fail + with raises(BulkIndexError): + await Doc.bulk(gen2(), refresh=True) + + async def gen3() -> AsyncIterator[Union[Doc, Dict[str, Any]]]: + yield Doc(_id="45", name="Sarah", age=45, languages=["es"]) + yield {"_op_type": "delete", "_id": docs[1].meta.id} + + await Doc.bulk(gen3(), refresh=True) + with raises(NotFoundError): + await Doc.get(docs[1].meta.id) + doc = await Doc.get("45") + assert doc is not None + assert (doc).to_dict() == { + "name": "Sarah", + "age": 45, + "languages": ["es"], + } + + +@pytest.mark.asyncio +async def test_legacy_dense_vector( + async_client: AsyncElasticsearch, es_version: Tuple[int, ...] +) -> None: + if es_version >= (8, 16): + pytest.skip("this test is a legacy version for Elasticsearch 8.15 or older") + + class Doc(AsyncDocument): + float_vector: List[float] = mapped_field(DenseVector(dims=3)) + + class Index: + name = "vectors" + + await Doc._index.delete(ignore_unavailable=True) + await Doc.init() + + doc = Doc(float_vector=[1.0, 1.2, 2.3]) + await doc.save(refresh=True) + + docs = await Doc.search().execute() + assert len(docs) == 1 + assert docs[0].float_vector == doc.float_vector + + +@pytest.mark.asyncio +async def test_dense_vector( + async_client: AsyncElasticsearch, es_version: Tuple[int, ...] +) -> None: + if es_version < (8, 16): + pytest.skip("this test requires Elasticsearch 8.16 or newer") + + class Doc(AsyncDocument): + float_vector: List[float] = mapped_field(DenseVector()) + byte_vector: List[int] = mapped_field(DenseVector(element_type="byte")) + bit_vector: str = mapped_field(DenseVector(element_type="bit")) + + class Index: + name = "vectors" + + await Doc._index.delete(ignore_unavailable=True) + await Doc.init() + + doc = Doc( + float_vector=[1.0, 1.2, 2.3], byte_vector=[12, 23, 34, 45], bit_vector="12abf0" + ) + await doc.save(refresh=True) + + docs = await Doc.search().execute() + assert len(docs) == 1 + assert docs[0].float_vector == doc.float_vector + assert docs[0].byte_vector == doc.byte_vector + assert docs[0].bit_vector == doc.bit_vector diff --git a/test_elasticsearch/test_dsl/test_integration/_async/test_faceted_search.py b/test_elasticsearch/test_dsl/test_integration/_async/test_faceted_search.py new file mode 100644 index 000000000..5efc7033e --- /dev/null +++ b/test_elasticsearch/test_dsl/test_integration/_async/test_faceted_search.py @@ -0,0 +1,305 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from datetime import datetime +from typing import Tuple, Type + +import pytest +from elasticsearch import AsyncElasticsearch + +from elasticsearch.dsl import A, AsyncDocument, AsyncSearch, Boolean, Date, Keyword +from elasticsearch.dsl.faceted_search import ( + AsyncFacetedSearch, + DateHistogramFacet, + NestedFacet, + RangeFacet, + TermsFacet, +) + +from .test_document import PullRequest + + +class Repos(AsyncDocument): + is_public = Boolean() + created_at = Date() + + class Index: + name = "git" + + +class Commit(AsyncDocument): + files = Keyword() + committed_date = Date() + + class Index: + name = "git" + + +class MetricSearch(AsyncFacetedSearch): + index = "git" + doc_types = [Commit] + + facets = { + "files": TermsFacet(field="files", metric=A("max", field="committed_date")), + } + + +@pytest.fixture(scope="session") +def commit_search_cls(es_version: Tuple[int, ...]) -> Type[AsyncFacetedSearch]: + if es_version >= (7, 2): + interval_kwargs = {"fixed_interval": "1d"} + else: + interval_kwargs = {"interval": "day"} + + class CommitSearch(AsyncFacetedSearch): + index = "flat-git" + fields = ( + "description", + "files", + ) + + facets = { + "files": TermsFacet(field="files"), + "frequency": DateHistogramFacet( + field="authored_date", min_doc_count=1, **interval_kwargs + ), + "deletions": RangeFacet( + field="stats.deletions", + ranges=[("ok", (None, 1)), ("good", (1, 5)), ("better", (5, None))], + ), + } + + return CommitSearch + + +@pytest.fixture(scope="session") +def repo_search_cls(es_version: Tuple[int, ...]) -> Type[AsyncFacetedSearch]: + interval_type = "calendar_interval" if es_version >= (7, 2) else "interval" + + class RepoSearch(AsyncFacetedSearch): + index = "git" + doc_types = [Repos] + facets = { + "public": TermsFacet(field="is_public"), + "created": DateHistogramFacet( + field="created_at", **{interval_type: "month"} + ), + } + + def search(self) -> AsyncSearch: + s = super().search() + return s.filter("term", commit_repo="repo") + + return RepoSearch + + +@pytest.fixture(scope="session") +def pr_search_cls(es_version: Tuple[int, ...]) -> Type[AsyncFacetedSearch]: + interval_type = "calendar_interval" if es_version >= (7, 2) else "interval" + + class PRSearch(AsyncFacetedSearch): + index = "test-prs" + doc_types = [PullRequest] + facets = { + "comments": NestedFacet( + "comments", + DateHistogramFacet( + field="comments.created_at", **{interval_type: "month"} + ), + ) + } + + return PRSearch + + +@pytest.mark.asyncio +async def test_facet_with_custom_metric(async_data_client: AsyncElasticsearch) -> None: + ms = MetricSearch() + r = await ms.execute() + + dates = [f[1] for f in r.facets.files] + assert dates == list(sorted(dates, reverse=True)) + assert dates[0] == 1399038439000 + + +@pytest.mark.asyncio +async def test_nested_facet( + async_pull_request: PullRequest, pr_search_cls: Type[AsyncFacetedSearch] +) -> None: + prs = pr_search_cls() + r = await prs.execute() + + assert r.hits.total.value == 1 # type: ignore[attr-defined] + assert [(datetime(2018, 1, 1, 0, 0), 1, False)] == r.facets.comments + + +@pytest.mark.asyncio +async def test_nested_facet_with_filter( + async_pull_request: PullRequest, pr_search_cls: Type[AsyncFacetedSearch] +) -> None: + prs = pr_search_cls(filters={"comments": datetime(2018, 1, 1, 0, 0)}) + r = await prs.execute() + + assert r.hits.total.value == 1 # type: ignore[attr-defined] + assert [(datetime(2018, 1, 1, 0, 0), 1, True)] == r.facets.comments + + prs = pr_search_cls(filters={"comments": datetime(2018, 2, 1, 0, 0)}) + r = await prs.execute() + assert not r.hits + + +@pytest.mark.asyncio +async def test_datehistogram_facet( + async_data_client: AsyncElasticsearch, repo_search_cls: Type[AsyncFacetedSearch] +) -> None: + rs = repo_search_cls() + r = await rs.execute() + + assert r.hits.total.value == 1 # type: ignore[attr-defined] + assert [(datetime(2014, 3, 1, 0, 0), 1, False)] == r.facets.created + + +@pytest.mark.asyncio +async def test_boolean_facet( + async_data_client: AsyncElasticsearch, repo_search_cls: Type[AsyncFacetedSearch] +) -> None: + rs = repo_search_cls() + r = await rs.execute() + + assert r.hits.total.value == 1 # type: ignore[attr-defined] + assert [(True, 1, False)] == r.facets.public + value, count, selected = r.facets.public[0] + assert value is True + + +@pytest.mark.asyncio +async def test_empty_search_finds_everything( + async_data_client: AsyncElasticsearch, + es_version: Tuple[int, ...], + commit_search_cls: Type[AsyncFacetedSearch], +) -> None: + cs = commit_search_cls() + r = await cs.execute() + + assert r.hits.total.value == 52 # type: ignore[attr-defined] + assert [ + ("elasticsearch_dsl", 40, False), + ("test_elasticsearch_dsl", 35, False), + ("elasticsearch_dsl/query.py", 19, False), + ("test_elasticsearch_dsl/test_search.py", 15, False), + ("elasticsearch_dsl/utils.py", 14, False), + ("test_elasticsearch_dsl/test_query.py", 13, False), + ("elasticsearch_dsl/search.py", 12, False), + ("elasticsearch_dsl/aggs.py", 11, False), + ("test_elasticsearch_dsl/test_result.py", 5, False), + ("elasticsearch_dsl/result.py", 3, False), + ] == r.facets.files + + assert [ + (datetime(2014, 3, 3, 0, 0), 2, False), + (datetime(2014, 3, 4, 0, 0), 1, False), + (datetime(2014, 3, 5, 0, 0), 3, False), + (datetime(2014, 3, 6, 0, 0), 3, False), + (datetime(2014, 3, 7, 0, 0), 9, False), + (datetime(2014, 3, 10, 0, 0), 2, False), + (datetime(2014, 3, 15, 0, 0), 4, False), + (datetime(2014, 3, 21, 0, 0), 2, False), + (datetime(2014, 3, 23, 0, 0), 2, False), + (datetime(2014, 3, 24, 0, 0), 10, False), + (datetime(2014, 4, 20, 0, 0), 2, False), + (datetime(2014, 4, 22, 0, 0), 2, False), + (datetime(2014, 4, 25, 0, 0), 3, False), + (datetime(2014, 4, 26, 0, 0), 2, False), + (datetime(2014, 4, 27, 0, 0), 2, False), + (datetime(2014, 5, 1, 0, 0), 2, False), + (datetime(2014, 5, 2, 0, 0), 1, False), + ] == r.facets.frequency + + assert [ + ("ok", 19, False), + ("good", 14, False), + ("better", 19, False), + ] == r.facets.deletions + + +@pytest.mark.asyncio +async def test_term_filters_are_shown_as_selected_and_data_is_filtered( + async_data_client: AsyncElasticsearch, commit_search_cls: Type[AsyncFacetedSearch] +) -> None: + cs = commit_search_cls(filters={"files": "test_elasticsearch_dsl"}) + + r = await cs.execute() + + assert 35 == r.hits.total.value # type: ignore[attr-defined] + assert [ + ("elasticsearch_dsl", 40, False), + ("test_elasticsearch_dsl", 35, True), # selected + ("elasticsearch_dsl/query.py", 19, False), + ("test_elasticsearch_dsl/test_search.py", 15, False), + ("elasticsearch_dsl/utils.py", 14, False), + ("test_elasticsearch_dsl/test_query.py", 13, False), + ("elasticsearch_dsl/search.py", 12, False), + ("elasticsearch_dsl/aggs.py", 11, False), + ("test_elasticsearch_dsl/test_result.py", 5, False), + ("elasticsearch_dsl/result.py", 3, False), + ] == r.facets.files + + assert [ + (datetime(2014, 3, 3, 0, 0), 1, False), + (datetime(2014, 3, 5, 0, 0), 2, False), + (datetime(2014, 3, 6, 0, 0), 3, False), + (datetime(2014, 3, 7, 0, 0), 6, False), + (datetime(2014, 3, 10, 0, 0), 1, False), + (datetime(2014, 3, 15, 0, 0), 3, False), + (datetime(2014, 3, 21, 0, 0), 2, False), + (datetime(2014, 3, 23, 0, 0), 1, False), + (datetime(2014, 3, 24, 0, 0), 7, False), + (datetime(2014, 4, 20, 0, 0), 1, False), + (datetime(2014, 4, 25, 0, 0), 3, False), + (datetime(2014, 4, 26, 0, 0), 2, False), + (datetime(2014, 4, 27, 0, 0), 1, False), + (datetime(2014, 5, 1, 0, 0), 1, False), + (datetime(2014, 5, 2, 0, 0), 1, False), + ] == r.facets.frequency + + assert [ + ("ok", 12, False), + ("good", 10, False), + ("better", 13, False), + ] == r.facets.deletions + + +@pytest.mark.asyncio +async def test_range_filters_are_shown_as_selected_and_data_is_filtered( + async_data_client: AsyncElasticsearch, commit_search_cls: Type[AsyncFacetedSearch] +) -> None: + cs = commit_search_cls(filters={"deletions": "better"}) + + r = await cs.execute() + + assert 19 == r.hits.total.value # type: ignore[attr-defined] + + +@pytest.mark.asyncio +async def test_pagination( + async_data_client: AsyncElasticsearch, commit_search_cls: Type[AsyncFacetedSearch] +) -> None: + cs = commit_search_cls() + cs = cs[0:20] + + assert 52 == await cs.count() + assert 20 == len(await cs.execute()) diff --git a/test_elasticsearch/test_dsl/test_integration/_async/test_index.py b/test_elasticsearch/test_dsl/test_integration/_async/test_index.py new file mode 100644 index 000000000..10c426e5d --- /dev/null +++ b/test_elasticsearch/test_dsl/test_integration/_async/test_index.py @@ -0,0 +1,162 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import pytest +from elasticsearch import AsyncElasticsearch + +from elasticsearch.dsl import ( + AsyncComposableIndexTemplate, + AsyncDocument, + AsyncIndex, + AsyncIndexTemplate, + Date, + Text, + analysis, +) + + +class Post(AsyncDocument): + title = Text(analyzer=analysis.analyzer("my_analyzer", tokenizer="keyword")) + published_from = Date() + + +@pytest.mark.asyncio +async def test_index_template_works(async_write_client: AsyncElasticsearch) -> None: + it = AsyncIndexTemplate("test-template", "test-legacy-*") + it.document(Post) + it.settings(number_of_replicas=0, number_of_shards=1) + await it.save() + + i = AsyncIndex("test-legacy-blog") + await i.create() + + assert { + "test-legacy-blog": { + "mappings": { + "properties": { + "title": {"type": "text", "analyzer": "my_analyzer"}, + "published_from": {"type": "date"}, + } + } + } + } == await async_write_client.indices.get_mapping(index="test-legacy-blog") + + +@pytest.mark.asyncio +async def test_composable_index_template_works( + async_write_client: AsyncElasticsearch, +) -> None: + it = AsyncComposableIndexTemplate("test-template", "test-*") + it.document(Post) + it.settings(number_of_replicas=0, number_of_shards=1) + await it.save() + + i = AsyncIndex("test-blog") + await i.create() + + assert { + "test-blog": { + "mappings": { + "properties": { + "title": {"type": "text", "analyzer": "my_analyzer"}, + "published_from": {"type": "date"}, + } + } + } + } == await async_write_client.indices.get_mapping(index="test-blog") + + +@pytest.mark.asyncio +async def test_index_can_be_saved_even_with_settings( + async_write_client: AsyncElasticsearch, +) -> None: + i = AsyncIndex("test-blog", using=async_write_client) + i.settings(number_of_shards=3, number_of_replicas=0) + await i.save() + i.settings(number_of_replicas=1) + await i.save() + + assert ( + "1" + == (await i.get_settings())["test-blog"]["settings"]["index"][ + "number_of_replicas" + ] + ) + + +@pytest.mark.asyncio +async def test_index_exists(async_data_client: AsyncElasticsearch) -> None: + assert await AsyncIndex("git").exists() + assert not await AsyncIndex("not-there").exists() + + +@pytest.mark.asyncio +async def test_index_can_be_created_with_settings_and_mappings( + async_write_client: AsyncElasticsearch, +) -> None: + i = AsyncIndex("test-blog", using=async_write_client) + i.document(Post) + i.settings(number_of_replicas=0, number_of_shards=1) + await i.create() + + assert { + "test-blog": { + "mappings": { + "properties": { + "title": {"type": "text", "analyzer": "my_analyzer"}, + "published_from": {"type": "date"}, + } + } + } + } == await async_write_client.indices.get_mapping(index="test-blog") + + settings = await async_write_client.indices.get_settings(index="test-blog") + assert settings["test-blog"]["settings"]["index"]["number_of_replicas"] == "0" + assert settings["test-blog"]["settings"]["index"]["number_of_shards"] == "1" + assert settings["test-blog"]["settings"]["index"]["analysis"] == { + "analyzer": {"my_analyzer": {"type": "custom", "tokenizer": "keyword"}} + } + + +@pytest.mark.asyncio +async def test_delete(async_write_client: AsyncElasticsearch) -> None: + await async_write_client.indices.create( + index="test-index", + body={"settings": {"number_of_replicas": 0, "number_of_shards": 1}}, + ) + + i = AsyncIndex("test-index", using=async_write_client) + await i.delete() + assert not await async_write_client.indices.exists(index="test-index") + + +@pytest.mark.asyncio +async def test_multiple_indices_with_same_doc_type_work( + async_write_client: AsyncElasticsearch, +) -> None: + i1 = AsyncIndex("test-index-1", using=async_write_client) + i2 = AsyncIndex("test-index-2", using=async_write_client) + + for i in (i1, i2): + i.document(Post) + await i.create() + + for j in ("test-index-1", "test-index-2"): + settings = await async_write_client.indices.get_settings(index=j) + assert settings[j]["settings"]["index"]["analysis"] == { + "analyzer": {"my_analyzer": {"type": "custom", "tokenizer": "keyword"}} + } diff --git a/test_elasticsearch/test_dsl/test_integration/_async/test_mapping.py b/test_elasticsearch/test_dsl/test_integration/_async/test_mapping.py new file mode 100644 index 000000000..3f860ba59 --- /dev/null +++ b/test_elasticsearch/test_dsl/test_integration/_async/test_mapping.py @@ -0,0 +1,171 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import pytest +from elasticsearch import AsyncElasticsearch +from pytest import raises + +from elasticsearch.dsl import AsyncMapping, analysis, exceptions + + +@pytest.mark.asyncio +async def test_mapping_saved_into_es(async_write_client: AsyncElasticsearch) -> None: + m = AsyncMapping() + m.field( + "name", "text", analyzer=analysis.analyzer("my_analyzer", tokenizer="keyword") + ) + m.field("tags", "keyword") + await m.save("test-mapping", using=async_write_client) + + assert { + "test-mapping": { + "mappings": { + "properties": { + "name": {"type": "text", "analyzer": "my_analyzer"}, + "tags": {"type": "keyword"}, + } + } + } + } == await async_write_client.indices.get_mapping(index="test-mapping") + + +@pytest.mark.asyncio +async def test_mapping_saved_into_es_when_index_already_exists_closed( + async_write_client: AsyncElasticsearch, +) -> None: + m = AsyncMapping() + m.field( + "name", "text", analyzer=analysis.analyzer("my_analyzer", tokenizer="keyword") + ) + await async_write_client.indices.create(index="test-mapping") + + with raises(exceptions.IllegalOperation): + await m.save("test-mapping", using=async_write_client) + + await async_write_client.cluster.health( + index="test-mapping", wait_for_status="yellow" + ) + await async_write_client.indices.close(index="test-mapping") + await m.save("test-mapping", using=async_write_client) + + assert { + "test-mapping": { + "mappings": { + "properties": {"name": {"type": "text", "analyzer": "my_analyzer"}} + } + } + } == await async_write_client.indices.get_mapping(index="test-mapping") + + +@pytest.mark.asyncio +async def test_mapping_saved_into_es_when_index_already_exists_with_analysis( + async_write_client: AsyncElasticsearch, +) -> None: + m = AsyncMapping() + analyzer = analysis.analyzer("my_analyzer", tokenizer="keyword") + m.field("name", "text", analyzer=analyzer) + + new_analysis = analyzer.get_analysis_definition() + new_analysis["analyzer"]["other_analyzer"] = { + "type": "custom", + "tokenizer": "whitespace", + } + await async_write_client.indices.create( + index="test-mapping", body={"settings": {"analysis": new_analysis}} + ) + + m.field("title", "text", analyzer=analyzer) + await m.save("test-mapping", using=async_write_client) + + assert { + "test-mapping": { + "mappings": { + "properties": { + "name": {"type": "text", "analyzer": "my_analyzer"}, + "title": {"type": "text", "analyzer": "my_analyzer"}, + } + } + } + } == await async_write_client.indices.get_mapping(index="test-mapping") + + +@pytest.mark.asyncio +async def test_mapping_gets_updated_from_es( + async_write_client: AsyncElasticsearch, +) -> None: + await async_write_client.indices.create( + index="test-mapping", + body={ + "settings": {"number_of_shards": 1, "number_of_replicas": 0}, + "mappings": { + "date_detection": False, + "properties": { + "title": { + "type": "text", + "analyzer": "snowball", + "fields": {"raw": {"type": "keyword"}}, + }, + "created_at": {"type": "date"}, + "comments": { + "type": "nested", + "properties": { + "created": {"type": "date"}, + "author": { + "type": "text", + "analyzer": "snowball", + "fields": {"raw": {"type": "keyword"}}, + }, + }, + }, + }, + }, + }, + ) + + m = await AsyncMapping.from_es("test-mapping", using=async_write_client) + + assert ["comments", "created_at", "title"] == list( + sorted(m.properties.properties._d_.keys()) # type: ignore[attr-defined] + ) + assert { + "date_detection": False, + "properties": { + "comments": { + "type": "nested", + "properties": { + "created": {"type": "date"}, + "author": { + "analyzer": "snowball", + "fields": {"raw": {"type": "keyword"}}, + "type": "text", + }, + }, + }, + "created_at": {"type": "date"}, + "title": { + "analyzer": "snowball", + "fields": {"raw": {"type": "keyword"}}, + "type": "text", + }, + }, + } == m.to_dict() + + # test same with alias + await async_write_client.indices.put_alias(index="test-mapping", name="test-alias") + + m2 = await AsyncMapping.from_es("test-alias", using=async_write_client) + assert m2.to_dict() == m.to_dict() diff --git a/test_elasticsearch/test_dsl/test_integration/_async/test_search.py b/test_elasticsearch/test_dsl/test_integration/_async/test_search.py new file mode 100644 index 000000000..627656dfd --- /dev/null +++ b/test_elasticsearch/test_dsl/test_integration/_async/test_search.py @@ -0,0 +1,304 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + + +import pytest +from elasticsearch import ApiError, AsyncElasticsearch +from pytest import raises + +from elasticsearch.dsl import ( + AsyncDocument, + AsyncMultiSearch, + AsyncSearch, + Date, + Keyword, + Q, + Text, +) +from elasticsearch.dsl.response import aggs + +from ..test_data import FLAT_DATA + + +class Repository(AsyncDocument): + created_at = Date() + description = Text(analyzer="snowball") + tags = Keyword() + + @classmethod + def search(cls) -> AsyncSearch["Repository"]: # type: ignore[override] + return super().search().filter("term", commit_repo="repo") + + class Index: + name = "git" + + +class Commit(AsyncDocument): + class Index: + name = "flat-git" + + +@pytest.mark.asyncio +async def test_filters_aggregation_buckets_are_accessible( + async_data_client: AsyncElasticsearch, +) -> None: + has_tests_query = Q("term", files="test_elasticsearch_dsl") + s = Commit.search()[0:0] + s.aggs.bucket("top_authors", "terms", field="author.name.raw").bucket( + "has_tests", "filters", filters={"yes": has_tests_query, "no": ~has_tests_query} + ).metric("lines", "stats", field="stats.lines") + + response = await s.execute() + + assert isinstance( + response.aggregations.top_authors.buckets[0].has_tests.buckets.yes, aggs.Bucket + ) + assert ( + 35 + == response.aggregations.top_authors.buckets[0].has_tests.buckets.yes.doc_count + ) + assert ( + 228 + == response.aggregations.top_authors.buckets[0].has_tests.buckets.yes.lines.max + ) + + +@pytest.mark.asyncio +async def test_top_hits_are_wrapped_in_response( + async_data_client: AsyncElasticsearch, +) -> None: + s = Commit.search()[0:0] + s.aggs.bucket("top_authors", "terms", field="author.name.raw").metric( + "top_commits", "top_hits", size=5 + ) + response = await s.execute() + + top_commits = response.aggregations.top_authors.buckets[0].top_commits + assert isinstance(top_commits, aggs.TopHitsData) + assert 5 == len(top_commits) + + hits = [h for h in top_commits] + assert 5 == len(hits) + assert isinstance(hits[0], Commit) + + +@pytest.mark.asyncio +async def test_inner_hits_are_wrapped_in_response( + async_data_client: AsyncElasticsearch, +) -> None: + s = AsyncSearch(index="git")[0:1].query( + "has_parent", parent_type="repo", inner_hits={}, query=Q("match_all") + ) + response = await s.execute() + + commit = response.hits[0] + assert isinstance(commit.meta.inner_hits.repo, response.__class__) + assert repr(commit.meta.inner_hits.repo[0]).startswith( + " None: + s = AsyncSearch(index="git")[0:1].query( + "has_parent", parent_type="repo", inner_hits={}, query=Q("match_all") + ) + response = await s.execute() + d = response.to_dict(recursive=True) + assert isinstance(d, dict) + assert isinstance(d["hits"]["hits"][0]["inner_hits"]["repo"], dict) + + # iterating over the results changes the format of the internal AttrDict + for hit in response: + pass + + d = response.to_dict(recursive=True) + assert isinstance(d, dict) + assert isinstance(d["hits"]["hits"][0]["inner_hits"]["repo"], dict) + + +@pytest.mark.asyncio +async def test_scan_respects_doc_types(async_data_client: AsyncElasticsearch) -> None: + repos = [repo async for repo in Repository.search().scan()] + + assert 1 == len(repos) + assert isinstance(repos[0], Repository) + assert repos[0].organization == "elasticsearch" + + +@pytest.mark.asyncio +async def test_scan_iterates_through_all_docs( + async_data_client: AsyncElasticsearch, +) -> None: + s = AsyncSearch(index="flat-git") + + commits = [commit async for commit in s.scan()] + + assert 52 == len(commits) + assert {d["_id"] for d in FLAT_DATA} == {c.meta.id for c in commits} + + +@pytest.mark.asyncio +async def test_search_after(async_data_client: AsyncElasticsearch) -> None: + page_size = 7 + s = AsyncSearch(index="flat-git")[:page_size].sort("authored_date") + commits = [] + while True: + r = await s.execute() + commits += r.hits + if len(r.hits) < page_size: + break + s = s.search_after() + + assert 52 == len(commits) + assert {d["_id"] for d in FLAT_DATA} == {c.meta.id for c in commits} + + +@pytest.mark.asyncio +async def test_search_after_no_search(async_data_client: AsyncElasticsearch) -> None: + s = AsyncSearch(index="flat-git") + with raises( + ValueError, match="A search must be executed before using search_after" + ): + s.search_after() + await s.count() + with raises( + ValueError, match="A search must be executed before using search_after" + ): + s.search_after() + + +@pytest.mark.asyncio +async def test_search_after_no_sort(async_data_client: AsyncElasticsearch) -> None: + s = AsyncSearch(index="flat-git") + r = await s.execute() + with raises( + ValueError, match="Cannot use search_after when results are not sorted" + ): + r.search_after() + + +@pytest.mark.asyncio +async def test_search_after_no_results(async_data_client: AsyncElasticsearch) -> None: + s = AsyncSearch(index="flat-git")[:100].sort("authored_date") + r = await s.execute() + assert 52 == len(r.hits) + s = s.search_after() + r = await s.execute() + assert 0 == len(r.hits) + with raises( + ValueError, match="Cannot use search_after when there are no search results" + ): + r.search_after() + + +@pytest.mark.asyncio +async def test_point_in_time(async_data_client: AsyncElasticsearch) -> None: + page_size = 7 + commits = [] + async with AsyncSearch(index="flat-git")[:page_size].point_in_time( + keep_alive="30s" + ) as s: + pit_id = s._extra["pit"]["id"] + while True: + r = await s.execute() + commits += r.hits + if len(r.hits) < page_size: + break + s = s.search_after() + assert pit_id == s._extra["pit"]["id"] + assert "30s" == s._extra["pit"]["keep_alive"] + + assert 52 == len(commits) + assert {d["_id"] for d in FLAT_DATA} == {c.meta.id for c in commits} + + +@pytest.mark.asyncio +async def test_iterate(async_data_client: AsyncElasticsearch) -> None: + s = AsyncSearch(index="flat-git") + + commits = [commit async for commit in s.iterate()] + + assert 52 == len(commits) + assert {d["_id"] for d in FLAT_DATA} == {c.meta.id for c in commits} + + +@pytest.mark.asyncio +async def test_response_is_cached(async_data_client: AsyncElasticsearch) -> None: + s = Repository.search() + repos = [repo async for repo in s] + + assert hasattr(s, "_response") + assert s._response.hits == repos + + +@pytest.mark.asyncio +async def test_multi_search(async_data_client: AsyncElasticsearch) -> None: + s1 = Repository.search() + s2 = AsyncSearch[Repository](index="flat-git") + + ms = AsyncMultiSearch[Repository]() + ms = ms.add(s1).add(s2) + + r1, r2 = await ms.execute() + + assert 1 == len(r1) + assert isinstance(r1[0], Repository) + assert r1._search is s1 + + assert 52 == r2.hits.total.value # type: ignore[attr-defined] + assert r2._search is s2 + + +@pytest.mark.asyncio +async def test_multi_missing(async_data_client: AsyncElasticsearch) -> None: + s1 = Repository.search() + s2 = AsyncSearch[Repository](index="flat-git") + s3 = AsyncSearch[Repository](index="does_not_exist") + + ms = AsyncMultiSearch[Repository]() + ms = ms.add(s1).add(s2).add(s3) + + with raises(ApiError): + await ms.execute() + + r1, r2, r3 = await ms.execute(raise_on_error=False) + + assert 1 == len(r1) + assert isinstance(r1[0], Repository) + assert r1._search is s1 + + assert 52 == r2.hits.total.value # type: ignore[attr-defined] + assert r2._search is s2 + + assert r3 is None + + +@pytest.mark.asyncio +async def test_raw_subfield_can_be_used_in_aggs( + async_data_client: AsyncElasticsearch, +) -> None: + s = AsyncSearch(index="git")[0:0] + s.aggs.bucket("authors", "terms", field="author.name.raw", size=1) + + r = await s.execute() + + authors = r.aggregations.authors + assert 1 == len(authors) + assert {"key": "Honza Král", "doc_count": 52} == authors[0] diff --git a/test_elasticsearch/test_dsl/test_integration/_async/test_update_by_query.py b/test_elasticsearch/test_dsl/test_integration/_async/test_update_by_query.py new file mode 100644 index 000000000..1fbf9d0e9 --- /dev/null +++ b/test_elasticsearch/test_dsl/test_integration/_async/test_update_by_query.py @@ -0,0 +1,85 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import pytest +from elasticsearch import AsyncElasticsearch + +from elasticsearch.dsl import AsyncUpdateByQuery +from elasticsearch.dsl.search import Q + + +@pytest.mark.asyncio +async def test_update_by_query_no_script( + async_write_client: AsyncElasticsearch, setup_ubq_tests: str +) -> None: + index = setup_ubq_tests + + ubq = ( + AsyncUpdateByQuery(using=async_write_client) + .index(index) + .filter(~Q("exists", field="is_public")) + ) + response = await ubq.execute() + + assert response.total == 52 + assert response["took"] > 0 + assert not response.timed_out + assert response.updated == 52 + assert response.deleted == 0 + assert response.took > 0 + assert response.success() + + +@pytest.mark.asyncio +async def test_update_by_query_with_script( + async_write_client: AsyncElasticsearch, setup_ubq_tests: str +) -> None: + index = setup_ubq_tests + + ubq = ( + AsyncUpdateByQuery(using=async_write_client) + .index(index) + .filter(~Q("exists", field="parent_shas")) + .script(source="ctx._source.is_public = false") + ) + ubq = ubq.params(conflicts="proceed") + + response = await ubq.execute() + assert response.total == 2 + assert response.updated == 2 + assert response.version_conflicts == 0 + + +@pytest.mark.asyncio +async def test_delete_by_query_with_script( + async_write_client: AsyncElasticsearch, setup_ubq_tests: str +) -> None: + index = setup_ubq_tests + + ubq = ( + AsyncUpdateByQuery(using=async_write_client) + .index(index) + .filter(Q("match", parent_shas="1dd19210b5be92b960f7db6f66ae526288edccc3")) + .script(source='ctx.op = "delete"') + ) + ubq = ubq.params(conflicts="proceed") + + response = await ubq.execute() + + assert response.total == 1 + assert response.deleted == 1 + assert response.success() diff --git a/test_elasticsearch/test_dsl/test_integration/_sync/__init__.py b/test_elasticsearch/test_dsl/test_integration/_sync/__init__.py new file mode 100644 index 000000000..2a87d183f --- /dev/null +++ b/test_elasticsearch/test_dsl/test_integration/_sync/__init__.py @@ -0,0 +1,16 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/test_elasticsearch/test_dsl/test_integration/_sync/test_analysis.py b/test_elasticsearch/test_dsl/test_integration/_sync/test_analysis.py new file mode 100644 index 000000000..a12756c62 --- /dev/null +++ b/test_elasticsearch/test_dsl/test_integration/_sync/test_analysis.py @@ -0,0 +1,54 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import pytest +from elasticsearch import Elasticsearch + +from elasticsearch.dsl import analyzer, token_filter, tokenizer + + +@pytest.mark.sync +def test_simulate_with_just__builtin_tokenizer( + client: Elasticsearch, +) -> None: + a = analyzer("my-analyzer", tokenizer="keyword") + tokens = (a.simulate("Hello World!", using=client)).tokens + + assert len(tokens) == 1 + assert tokens[0].token == "Hello World!" + + +@pytest.mark.sync +def test_simulate_complex(client: Elasticsearch) -> None: + a = analyzer( + "my-analyzer", + tokenizer=tokenizer("split_words", "simple_pattern_split", pattern=":"), + filter=["lowercase", token_filter("no-ifs", "stop", stopwords=["if"])], + ) + + tokens = (a.simulate("if:this:works", using=client)).tokens + + assert len(tokens) == 2 + assert ["this", "works"] == [t.token for t in tokens] + + +@pytest.mark.sync +def test_simulate_builtin(client: Elasticsearch) -> None: + a = analyzer("my-analyzer", "english") + tokens = (a.simulate("fixes running")).tokens + + assert ["fix", "run"] == [t.token for t in tokens] diff --git a/test_elasticsearch/test_dsl/test_integration/_sync/test_document.py b/test_elasticsearch/test_dsl/test_integration/_sync/test_document.py new file mode 100644 index 000000000..08f983b6e --- /dev/null +++ b/test_elasticsearch/test_dsl/test_integration/_sync/test_document.py @@ -0,0 +1,844 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# this file creates several documents using bad or no types because +# these are still supported and should be kept functional in spite +# of not having appropriate type hints. For that reason the comment +# below disables many mypy checks that fails as a result of this. +# mypy: disable-error-code="assignment, index, arg-type, call-arg, operator, comparison-overlap, attr-defined" + +from datetime import datetime +from ipaddress import ip_address +from typing import TYPE_CHECKING, Any, Dict, Iterator, List, Tuple, Union + +import pytest +from elasticsearch import ConflictError, Elasticsearch, NotFoundError +from elasticsearch.helpers.errors import BulkIndexError +from pytest import raises +from pytz import timezone + +from elasticsearch.dsl import ( + Binary, + Boolean, + Date, + DenseVector, + Document, + Double, + InnerDoc, + Ip, + Keyword, + Long, + Mapping, + MetaField, + Nested, + Object, + Q, + RankFeatures, + Search, + Text, + analyzer, + mapped_field, +) +from elasticsearch.dsl.utils import AttrList + +snowball = analyzer("my_snow", tokenizer="standard", filter=["lowercase", "snowball"]) + + +class User(InnerDoc): + name = Text(fields={"raw": Keyword()}) + + +class Wiki(Document): + owner = Object(User) + views = Long() + ranked = RankFeatures() + + class Index: + name = "test-wiki" + + +class Repository(Document): + owner = Object(User) + created_at = Date() + description = Text(analyzer=snowball) + tags = Keyword() + + @classmethod + def search(cls) -> Search["Repository"]: # type: ignore[override] + return super().search().filter("term", commit_repo="repo") + + class Index: + name = "git" + + +class Commit(Document): + committed_date = Date() + authored_date = Date() + description = Text(analyzer=snowball) + + class Index: + name = "flat-git" + + class Meta: + mapping = Mapping() + + +class History(InnerDoc): + timestamp = Date() + diff = Text() + + +class Comment(InnerDoc): + content = Text() + created_at = Date() + author = Object(User) + history = Nested(History) + + class Meta: + dynamic = MetaField(False) + + +class PullRequest(Document): + comments = Nested(Comment) + created_at = Date() + + class Index: + name = "test-prs" + + +class SerializationDoc(Document): + i = Long() + b = Boolean() + d = Double() + bin = Binary() + ip = Ip() + + class Index: + name = "test-serialization" + + +class Tags(Document): + tags = Keyword(multi=True) + + class Index: + name = "tags" + + +@pytest.mark.sync +def test_serialization(write_client: Elasticsearch) -> None: + SerializationDoc.init() + write_client.index( + index="test-serialization", + id=42, + body={ + "i": [1, 2, "3", None], + "b": [True, False, "true", "false", None], + "d": [0.1, "-0.1", None], + "bin": ["SGVsbG8gV29ybGQ=", None], + "ip": ["::1", "127.0.0.1", None], + }, + ) + sd = SerializationDoc.get(id=42) + assert sd is not None + + assert sd.i == [1, 2, 3, None] + assert sd.b == [True, False, True, False, None] + assert sd.d == [0.1, -0.1, None] + assert sd.bin == [b"Hello World", None] + assert sd.ip == [ip_address("::1"), ip_address("127.0.0.1"), None] + + assert sd.to_dict() == { + "b": [True, False, True, False, None], + "bin": ["SGVsbG8gV29ybGQ=", None], + "d": [0.1, -0.1, None], + "i": [1, 2, 3, None], + "ip": ["::1", "127.0.0.1", None], + } + + +@pytest.mark.sync +def test_nested_inner_hits_are_wrapped_properly(pull_request: Any) -> None: + history_query = Q( + "nested", + path="comments.history", + inner_hits={}, + query=Q("match", comments__history__diff="ahoj"), + ) + s = PullRequest.search().query( + "nested", inner_hits={}, path="comments", query=history_query + ) + + response = s.execute() + pr = response.hits[0] + assert isinstance(pr, PullRequest) + assert isinstance(pr.comments[0], Comment) + assert isinstance(pr.comments[0].history[0], History) + + comment = pr.meta.inner_hits.comments.hits[0] + assert isinstance(comment, Comment) + assert comment.author.name == "honzakral" + assert isinstance(comment.history[0], History) + + history = comment.meta.inner_hits["comments.history"].hits[0] + assert isinstance(history, History) + assert history.timestamp == datetime(2012, 1, 1) + assert "score" in history.meta + + +@pytest.mark.sync +def test_nested_inner_hits_are_deserialized_properly( + pull_request: Any, +) -> None: + s = PullRequest.search().query( + "nested", + inner_hits={}, + path="comments", + query=Q("match", comments__content="hello"), + ) + + response = s.execute() + pr = response.hits[0] + assert isinstance(pr.created_at, datetime) + assert isinstance(pr.comments[0], Comment) + assert isinstance(pr.comments[0].created_at, datetime) + + +@pytest.mark.sync +def test_nested_top_hits_are_wrapped_properly(pull_request: Any) -> None: + s = PullRequest.search() + s.aggs.bucket("comments", "nested", path="comments").metric( + "hits", "top_hits", size=1 + ) + + r = s.execute() + + print(r._d_) + assert isinstance(r.aggregations.comments.hits.hits[0], Comment) + + +@pytest.mark.sync +def test_update_object_field(write_client: Elasticsearch) -> None: + Wiki.init() + w = Wiki( + owner=User(name="Honza Kral"), + _id="elasticsearch-py", + ranked={"test1": 0.1, "topic2": 0.2}, + ) + w.save() + + assert "updated" == w.update(owner=[{"name": "Honza"}, User(name="Nick")]) + assert w.owner[0].name == "Honza" + assert w.owner[1].name == "Nick" + + w = Wiki.get(id="elasticsearch-py") + assert w.owner[0].name == "Honza" + assert w.owner[1].name == "Nick" + + assert w.ranked == {"test1": 0.1, "topic2": 0.2} + + +@pytest.mark.sync +def test_update_script(write_client: Elasticsearch) -> None: + Wiki.init() + w = Wiki(owner=User(name="Honza Kral"), _id="elasticsearch-py", views=42) + w.save() + + w.update(script="ctx._source.views += params.inc", inc=5) + w = Wiki.get(id="elasticsearch-py") + assert w.views == 47 + + +@pytest.mark.sync +def test_update_script_with_dict(write_client: Elasticsearch) -> None: + Wiki.init() + w = Wiki(owner=User(name="Honza Kral"), _id="elasticsearch-py", views=42) + w.save() + + w.update( + script={ + "source": "ctx._source.views += params.inc1 + params.inc2", + "params": {"inc1": 2}, + "lang": "painless", + }, + inc2=3, + ) + w = Wiki.get(id="elasticsearch-py") + assert w.views == 47 + + +@pytest.mark.sync +def test_update_retry_on_conflict(write_client: Elasticsearch) -> None: + Wiki.init() + w = Wiki(owner=User(name="Honza Kral"), _id="elasticsearch-py", views=42) + w.save() + + w1 = Wiki.get(id="elasticsearch-py") + w2 = Wiki.get(id="elasticsearch-py") + assert w1 is not None + assert w2 is not None + + w1.update(script="ctx._source.views += params.inc", inc=5, retry_on_conflict=1) + w2.update(script="ctx._source.views += params.inc", inc=5, retry_on_conflict=1) + + w = Wiki.get(id="elasticsearch-py") + assert w.views == 52 + + +@pytest.mark.sync +@pytest.mark.parametrize("retry_on_conflict", [None, 0]) +def test_update_conflicting_version( + write_client: Elasticsearch, retry_on_conflict: bool +) -> None: + Wiki.init() + w = Wiki(owner=User(name="Honza Kral"), _id="elasticsearch-py", views=42) + w.save() + + w1 = Wiki.get(id="elasticsearch-py") + w2 = Wiki.get(id="elasticsearch-py") + assert w1 is not None + assert w2 is not None + + w1.update(script="ctx._source.views += params.inc", inc=5) + + with raises(ConflictError): + w2.update( + script="ctx._source.views += params.inc", + inc=5, + retry_on_conflict=retry_on_conflict, + ) + + +@pytest.mark.sync +def test_save_and_update_return_doc_meta( + write_client: Elasticsearch, +) -> None: + Wiki.init() + w = Wiki(owner=User(name="Honza Kral"), _id="elasticsearch-py", views=42) + resp = w.save(return_doc_meta=True) + assert resp["_index"] == "test-wiki" + assert resp["result"] == "created" + assert set(resp.keys()) == { + "_id", + "_index", + "_primary_term", + "_seq_no", + "_shards", + "_version", + "result", + } + + resp = w.update( + script="ctx._source.views += params.inc", inc=5, return_doc_meta=True + ) + assert resp["_index"] == "test-wiki" + assert resp["result"] == "updated" + assert set(resp.keys()) == { + "_id", + "_index", + "_primary_term", + "_seq_no", + "_shards", + "_version", + "result", + } + + +@pytest.mark.sync +def test_init(write_client: Elasticsearch) -> None: + Repository.init(index="test-git") + + assert write_client.indices.exists(index="test-git") + + +@pytest.mark.sync +def test_get_raises_404_on_index_missing( + data_client: Elasticsearch, +) -> None: + with raises(NotFoundError): + Repository.get("elasticsearch-dsl-php", index="not-there") + + +@pytest.mark.sync +def test_get_raises_404_on_non_existent_id( + data_client: Elasticsearch, +) -> None: + with raises(NotFoundError): + Repository.get("elasticsearch-dsl-php") + + +@pytest.mark.sync +def test_get_returns_none_if_404_ignored( + data_client: Elasticsearch, +) -> None: + assert None is Repository.get( + "elasticsearch-dsl-php", using=data_client.options(ignore_status=404) + ) + + +@pytest.mark.sync +def test_get_returns_none_if_404_ignored_and_index_doesnt_exist( + data_client: Elasticsearch, +) -> None: + assert None is Repository.get( + "42", index="not-there", using=data_client.options(ignore_status=404) + ) + + +@pytest.mark.sync +def test_get(data_client: Elasticsearch) -> None: + elasticsearch_repo = Repository.get("elasticsearch-dsl-py") + + assert isinstance(elasticsearch_repo, Repository) + assert elasticsearch_repo.owner.name == "elasticsearch" + assert datetime(2014, 3, 3) == elasticsearch_repo.created_at + + +@pytest.mark.sync +def test_exists_return_true(data_client: Elasticsearch) -> None: + assert Repository.exists("elasticsearch-dsl-py") + + +@pytest.mark.sync +def test_exists_false(data_client: Elasticsearch) -> None: + assert not Repository.exists("elasticsearch-dsl-php") + + +@pytest.mark.sync +def test_get_with_tz_date(data_client: Elasticsearch) -> None: + first_commit = Commit.get( + id="3ca6e1e73a071a705b4babd2f581c91a2a3e5037", routing="elasticsearch-dsl-py" + ) + assert first_commit is not None + + tzinfo = timezone("Europe/Prague") + assert ( + tzinfo.localize(datetime(2014, 5, 2, 13, 47, 19, 123000)) + == first_commit.authored_date + ) + + +@pytest.mark.sync +def test_save_with_tz_date(data_client: Elasticsearch) -> None: + tzinfo = timezone("Europe/Prague") + first_commit = Commit.get( + id="3ca6e1e73a071a705b4babd2f581c91a2a3e5037", routing="elasticsearch-dsl-py" + ) + assert first_commit is not None + + first_commit.committed_date = tzinfo.localize( + datetime(2014, 5, 2, 13, 47, 19, 123456) + ) + first_commit.save() + + first_commit = Commit.get( + id="3ca6e1e73a071a705b4babd2f581c91a2a3e5037", routing="elasticsearch-dsl-py" + ) + assert first_commit is not None + + assert ( + tzinfo.localize(datetime(2014, 5, 2, 13, 47, 19, 123456)) + == first_commit.committed_date + ) + + +COMMIT_DOCS_WITH_MISSING = [ + {"_id": "0"}, # Missing + {"_id": "3ca6e1e73a071a705b4babd2f581c91a2a3e5037"}, # Existing + {"_id": "f"}, # Missing + {"_id": "eb3e543323f189fd7b698e66295427204fff5755"}, # Existing +] + + +@pytest.mark.sync +def test_mget(data_client: Elasticsearch) -> None: + commits = Commit.mget(COMMIT_DOCS_WITH_MISSING) + assert commits[0] is None + assert commits[1] is not None + assert commits[1].meta.id == "3ca6e1e73a071a705b4babd2f581c91a2a3e5037" + assert commits[2] is None + assert commits[3] is not None + assert commits[3].meta.id == "eb3e543323f189fd7b698e66295427204fff5755" + + +@pytest.mark.sync +def test_mget_raises_exception_when_missing_param_is_invalid( + data_client: Elasticsearch, +) -> None: + with raises(ValueError): + Commit.mget(COMMIT_DOCS_WITH_MISSING, missing="raj") + + +@pytest.mark.sync +def test_mget_raises_404_when_missing_param_is_raise( + data_client: Elasticsearch, +) -> None: + with raises(NotFoundError): + Commit.mget(COMMIT_DOCS_WITH_MISSING, missing="raise") + + +@pytest.mark.sync +def test_mget_ignores_missing_docs_when_missing_param_is_skip( + data_client: Elasticsearch, +) -> None: + commits = Commit.mget(COMMIT_DOCS_WITH_MISSING, missing="skip") + assert commits[0] is not None + assert commits[0].meta.id == "3ca6e1e73a071a705b4babd2f581c91a2a3e5037" + assert commits[1] is not None + assert commits[1].meta.id == "eb3e543323f189fd7b698e66295427204fff5755" + + +@pytest.mark.sync +def test_update_works_from_search_response( + data_client: Elasticsearch, +) -> None: + elasticsearch_repo = (Repository.search().execute())[0] + + elasticsearch_repo.update(owner={"other_name": "elastic"}) + assert "elastic" == elasticsearch_repo.owner.other_name + + new_version = Repository.get("elasticsearch-dsl-py") + assert new_version is not None + assert "elastic" == new_version.owner.other_name + assert "elasticsearch" == new_version.owner.name + + +@pytest.mark.sync +def test_update(data_client: Elasticsearch) -> None: + elasticsearch_repo = Repository.get("elasticsearch-dsl-py") + assert elasticsearch_repo is not None + v = elasticsearch_repo.meta.version + + old_seq_no = elasticsearch_repo.meta.seq_no + elasticsearch_repo.update(owner={"new_name": "elastic"}, new_field="testing-update") + + assert "elastic" == elasticsearch_repo.owner.new_name + assert "testing-update" == elasticsearch_repo.new_field + + # assert version has been updated + assert elasticsearch_repo.meta.version == v + 1 + + new_version = Repository.get("elasticsearch-dsl-py") + assert new_version is not None + assert "testing-update" == new_version.new_field + assert "elastic" == new_version.owner.new_name + assert "elasticsearch" == new_version.owner.name + assert "seq_no" in new_version.meta + assert new_version.meta.seq_no != old_seq_no + assert "primary_term" in new_version.meta + + +@pytest.mark.sync +def test_save_updates_existing_doc(data_client: Elasticsearch) -> None: + elasticsearch_repo = Repository.get("elasticsearch-dsl-py") + assert elasticsearch_repo is not None + + elasticsearch_repo.new_field = "testing-save" + old_seq_no = elasticsearch_repo.meta.seq_no + assert "updated" == elasticsearch_repo.save() + + new_repo = data_client.get(index="git", id="elasticsearch-dsl-py") + assert "testing-save" == new_repo["_source"]["new_field"] + assert new_repo["_seq_no"] != old_seq_no + assert new_repo["_seq_no"] == elasticsearch_repo.meta.seq_no + + +@pytest.mark.sync +def test_update_empty_field(client: Elasticsearch) -> None: + Tags._index.delete(ignore_unavailable=True) + Tags.init() + d = Tags(id="123", tags=["a", "b"]) + d.save(refresh=True) + d.update(tags=[], refresh=True) + assert d.tags == [] + + r = Tags.search().execute() + assert r.hits[0].tags == [] + + +@pytest.mark.sync +def test_save_automatically_uses_seq_no_and_primary_term( + data_client: Elasticsearch, +) -> None: + elasticsearch_repo = Repository.get("elasticsearch-dsl-py") + assert elasticsearch_repo is not None + elasticsearch_repo.meta.seq_no += 1 + + with raises(ConflictError): + elasticsearch_repo.save() + + +@pytest.mark.sync +def test_delete_automatically_uses_seq_no_and_primary_term( + data_client: Elasticsearch, +) -> None: + elasticsearch_repo = Repository.get("elasticsearch-dsl-py") + assert elasticsearch_repo is not None + elasticsearch_repo.meta.seq_no += 1 + + with raises(ConflictError): + elasticsearch_repo.delete() + + +def assert_doc_equals(expected: Any, actual: Any) -> None: + for f in expected: + assert f in actual + assert actual[f] == expected[f] + + +@pytest.mark.sync +def test_can_save_to_different_index( + write_client: Elasticsearch, +) -> None: + test_repo = Repository(description="testing", meta={"id": 42}) + assert test_repo.save(index="test-document") + + assert_doc_equals( + { + "found": True, + "_index": "test-document", + "_id": "42", + "_source": {"description": "testing"}, + }, + write_client.get(index="test-document", id=42), + ) + + +@pytest.mark.sync +def test_save_without_skip_empty_will_include_empty_fields( + write_client: Elasticsearch, +) -> None: + test_repo = Repository(field_1=[], field_2=None, field_3={}, meta={"id": 42}) + assert test_repo.save(index="test-document", skip_empty=False) + + assert_doc_equals( + { + "found": True, + "_index": "test-document", + "_id": "42", + "_source": {"field_1": [], "field_2": None, "field_3": {}}, + }, + write_client.get(index="test-document", id=42), + ) + + +@pytest.mark.sync +def test_delete(write_client: Elasticsearch) -> None: + write_client.create( + index="test-document", + id="elasticsearch-dsl-py", + body={ + "organization": "elasticsearch", + "created_at": "2014-03-03", + "owner": {"name": "elasticsearch"}, + }, + ) + + test_repo = Repository(meta={"id": "elasticsearch-dsl-py"}) + test_repo.meta.index = "test-document" + test_repo.delete() + + assert not write_client.exists( + index="test-document", + id="elasticsearch-dsl-py", + ) + + +@pytest.mark.sync +def test_search(data_client: Elasticsearch) -> None: + assert Repository.search().count() == 1 + + +@pytest.mark.sync +def test_search_returns_proper_doc_classes( + data_client: Elasticsearch, +) -> None: + result = Repository.search().execute() + + elasticsearch_repo = result.hits[0] + + assert isinstance(elasticsearch_repo, Repository) + assert elasticsearch_repo.owner.name == "elasticsearch" + + +@pytest.mark.sync +def test_refresh_mapping(data_client: Elasticsearch) -> None: + class Commit(Document): + class Index: + name = "git" + + Commit._index.load_mappings() + + assert "stats" in Commit._index._mapping + assert "committer" in Commit._index._mapping + assert "description" in Commit._index._mapping + assert "committed_date" in Commit._index._mapping + assert isinstance(Commit._index._mapping["committed_date"], Date) + + +@pytest.mark.sync +def test_highlight_in_meta(data_client: Elasticsearch) -> None: + commit = ( + Commit.search() + .query("match", description="inverting") + .highlight("description") + .execute() + )[0] + + assert isinstance(commit, Commit) + assert "description" in commit.meta.highlight + assert isinstance(commit.meta.highlight["description"], AttrList) + assert len(commit.meta.highlight["description"]) > 0 + + +@pytest.mark.sync +def test_bulk(data_client: Elasticsearch) -> None: + class Address(InnerDoc): + street: str + active: bool + + class Doc(Document): + if TYPE_CHECKING: + _id: int + name: str + age: int + languages: List[str] = mapped_field(Keyword()) + addresses: List[Address] + + class Index: + name = "bulk-index" + + Doc._index.delete(ignore_unavailable=True) + Doc.init() + + def gen1() -> Iterator[Union[Doc, Dict[str, Any]]]: + yield Doc( + name="Joe", + age=33, + languages=["en", "fr"], + addresses=[ + Address(street="123 Main St", active=True), + Address(street="321 Park Dr.", active=False), + ], + ) + yield Doc(name="Susan", age=20, languages=["en"]) + yield {"_op_type": "create", "_id": "45", "_source": Doc(name="Sarah", age=45)} + + Doc.bulk(gen1(), refresh=True) + docs = list(Doc.search().execute()) + assert len(docs) == 3 + assert docs[0].to_dict() == { + "name": "Joe", + "age": 33, + "languages": [ + "en", + "fr", + ], + "addresses": [ + { + "active": True, + "street": "123 Main St", + }, + { + "active": False, + "street": "321 Park Dr.", + }, + ], + } + assert docs[1].to_dict() == { + "name": "Susan", + "age": 20, + "languages": ["en"], + } + assert docs[2].to_dict() == { + "name": "Sarah", + "age": 45, + } + assert docs[2].meta.id == "45" + + def gen2() -> Iterator[Union[Doc, Dict[str, Any]]]: + yield {"_op_type": "create", "_id": "45", "_source": Doc(name="Sarah", age=45)} + + # a "create" action with an existing id should fail + with raises(BulkIndexError): + Doc.bulk(gen2(), refresh=True) + + def gen3() -> Iterator[Union[Doc, Dict[str, Any]]]: + yield Doc(_id="45", name="Sarah", age=45, languages=["es"]) + yield {"_op_type": "delete", "_id": docs[1].meta.id} + + Doc.bulk(gen3(), refresh=True) + with raises(NotFoundError): + Doc.get(docs[1].meta.id) + doc = Doc.get("45") + assert doc is not None + assert (doc).to_dict() == { + "name": "Sarah", + "age": 45, + "languages": ["es"], + } + + +@pytest.mark.sync +def test_legacy_dense_vector( + client: Elasticsearch, es_version: Tuple[int, ...] +) -> None: + if es_version >= (8, 16): + pytest.skip("this test is a legacy version for Elasticsearch 8.15 or older") + + class Doc(Document): + float_vector: List[float] = mapped_field(DenseVector(dims=3)) + + class Index: + name = "vectors" + + Doc._index.delete(ignore_unavailable=True) + Doc.init() + + doc = Doc(float_vector=[1.0, 1.2, 2.3]) + doc.save(refresh=True) + + docs = Doc.search().execute() + assert len(docs) == 1 + assert docs[0].float_vector == doc.float_vector + + +@pytest.mark.sync +def test_dense_vector(client: Elasticsearch, es_version: Tuple[int, ...]) -> None: + if es_version < (8, 16): + pytest.skip("this test requires Elasticsearch 8.16 or newer") + + class Doc(Document): + float_vector: List[float] = mapped_field(DenseVector()) + byte_vector: List[int] = mapped_field(DenseVector(element_type="byte")) + bit_vector: str = mapped_field(DenseVector(element_type="bit")) + + class Index: + name = "vectors" + + Doc._index.delete(ignore_unavailable=True) + Doc.init() + + doc = Doc( + float_vector=[1.0, 1.2, 2.3], byte_vector=[12, 23, 34, 45], bit_vector="12abf0" + ) + doc.save(refresh=True) + + docs = Doc.search().execute() + assert len(docs) == 1 + assert docs[0].float_vector == doc.float_vector + assert docs[0].byte_vector == doc.byte_vector + assert docs[0].bit_vector == doc.bit_vector diff --git a/test_elasticsearch/test_dsl/test_integration/_sync/test_faceted_search.py b/test_elasticsearch/test_dsl/test_integration/_sync/test_faceted_search.py new file mode 100644 index 000000000..114800644 --- /dev/null +++ b/test_elasticsearch/test_dsl/test_integration/_sync/test_faceted_search.py @@ -0,0 +1,305 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from datetime import datetime +from typing import Tuple, Type + +import pytest +from elasticsearch import Elasticsearch + +from elasticsearch.dsl import A, Boolean, Date, Document, Keyword, Search +from elasticsearch.dsl.faceted_search import ( + DateHistogramFacet, + FacetedSearch, + NestedFacet, + RangeFacet, + TermsFacet, +) + +from .test_document import PullRequest + + +class Repos(Document): + is_public = Boolean() + created_at = Date() + + class Index: + name = "git" + + +class Commit(Document): + files = Keyword() + committed_date = Date() + + class Index: + name = "git" + + +class MetricSearch(FacetedSearch): + index = "git" + doc_types = [Commit] + + facets = { + "files": TermsFacet(field="files", metric=A("max", field="committed_date")), + } + + +@pytest.fixture(scope="session") +def commit_search_cls(es_version: Tuple[int, ...]) -> Type[FacetedSearch]: + if es_version >= (7, 2): + interval_kwargs = {"fixed_interval": "1d"} + else: + interval_kwargs = {"interval": "day"} + + class CommitSearch(FacetedSearch): + index = "flat-git" + fields = ( + "description", + "files", + ) + + facets = { + "files": TermsFacet(field="files"), + "frequency": DateHistogramFacet( + field="authored_date", min_doc_count=1, **interval_kwargs + ), + "deletions": RangeFacet( + field="stats.deletions", + ranges=[("ok", (None, 1)), ("good", (1, 5)), ("better", (5, None))], + ), + } + + return CommitSearch + + +@pytest.fixture(scope="session") +def repo_search_cls(es_version: Tuple[int, ...]) -> Type[FacetedSearch]: + interval_type = "calendar_interval" if es_version >= (7, 2) else "interval" + + class RepoSearch(FacetedSearch): + index = "git" + doc_types = [Repos] + facets = { + "public": TermsFacet(field="is_public"), + "created": DateHistogramFacet( + field="created_at", **{interval_type: "month"} + ), + } + + def search(self) -> Search: + s = super().search() + return s.filter("term", commit_repo="repo") + + return RepoSearch + + +@pytest.fixture(scope="session") +def pr_search_cls(es_version: Tuple[int, ...]) -> Type[FacetedSearch]: + interval_type = "calendar_interval" if es_version >= (7, 2) else "interval" + + class PRSearch(FacetedSearch): + index = "test-prs" + doc_types = [PullRequest] + facets = { + "comments": NestedFacet( + "comments", + DateHistogramFacet( + field="comments.created_at", **{interval_type: "month"} + ), + ) + } + + return PRSearch + + +@pytest.mark.sync +def test_facet_with_custom_metric(data_client: Elasticsearch) -> None: + ms = MetricSearch() + r = ms.execute() + + dates = [f[1] for f in r.facets.files] + assert dates == list(sorted(dates, reverse=True)) + assert dates[0] == 1399038439000 + + +@pytest.mark.sync +def test_nested_facet( + pull_request: PullRequest, pr_search_cls: Type[FacetedSearch] +) -> None: + prs = pr_search_cls() + r = prs.execute() + + assert r.hits.total.value == 1 # type: ignore[attr-defined] + assert [(datetime(2018, 1, 1, 0, 0), 1, False)] == r.facets.comments + + +@pytest.mark.sync +def test_nested_facet_with_filter( + pull_request: PullRequest, pr_search_cls: Type[FacetedSearch] +) -> None: + prs = pr_search_cls(filters={"comments": datetime(2018, 1, 1, 0, 0)}) + r = prs.execute() + + assert r.hits.total.value == 1 # type: ignore[attr-defined] + assert [(datetime(2018, 1, 1, 0, 0), 1, True)] == r.facets.comments + + prs = pr_search_cls(filters={"comments": datetime(2018, 2, 1, 0, 0)}) + r = prs.execute() + assert not r.hits + + +@pytest.mark.sync +def test_datehistogram_facet( + data_client: Elasticsearch, repo_search_cls: Type[FacetedSearch] +) -> None: + rs = repo_search_cls() + r = rs.execute() + + assert r.hits.total.value == 1 # type: ignore[attr-defined] + assert [(datetime(2014, 3, 1, 0, 0), 1, False)] == r.facets.created + + +@pytest.mark.sync +def test_boolean_facet( + data_client: Elasticsearch, repo_search_cls: Type[FacetedSearch] +) -> None: + rs = repo_search_cls() + r = rs.execute() + + assert r.hits.total.value == 1 # type: ignore[attr-defined] + assert [(True, 1, False)] == r.facets.public + value, count, selected = r.facets.public[0] + assert value is True + + +@pytest.mark.sync +def test_empty_search_finds_everything( + data_client: Elasticsearch, + es_version: Tuple[int, ...], + commit_search_cls: Type[FacetedSearch], +) -> None: + cs = commit_search_cls() + r = cs.execute() + + assert r.hits.total.value == 52 # type: ignore[attr-defined] + assert [ + ("elasticsearch_dsl", 40, False), + ("test_elasticsearch_dsl", 35, False), + ("elasticsearch_dsl/query.py", 19, False), + ("test_elasticsearch_dsl/test_search.py", 15, False), + ("elasticsearch_dsl/utils.py", 14, False), + ("test_elasticsearch_dsl/test_query.py", 13, False), + ("elasticsearch_dsl/search.py", 12, False), + ("elasticsearch_dsl/aggs.py", 11, False), + ("test_elasticsearch_dsl/test_result.py", 5, False), + ("elasticsearch_dsl/result.py", 3, False), + ] == r.facets.files + + assert [ + (datetime(2014, 3, 3, 0, 0), 2, False), + (datetime(2014, 3, 4, 0, 0), 1, False), + (datetime(2014, 3, 5, 0, 0), 3, False), + (datetime(2014, 3, 6, 0, 0), 3, False), + (datetime(2014, 3, 7, 0, 0), 9, False), + (datetime(2014, 3, 10, 0, 0), 2, False), + (datetime(2014, 3, 15, 0, 0), 4, False), + (datetime(2014, 3, 21, 0, 0), 2, False), + (datetime(2014, 3, 23, 0, 0), 2, False), + (datetime(2014, 3, 24, 0, 0), 10, False), + (datetime(2014, 4, 20, 0, 0), 2, False), + (datetime(2014, 4, 22, 0, 0), 2, False), + (datetime(2014, 4, 25, 0, 0), 3, False), + (datetime(2014, 4, 26, 0, 0), 2, False), + (datetime(2014, 4, 27, 0, 0), 2, False), + (datetime(2014, 5, 1, 0, 0), 2, False), + (datetime(2014, 5, 2, 0, 0), 1, False), + ] == r.facets.frequency + + assert [ + ("ok", 19, False), + ("good", 14, False), + ("better", 19, False), + ] == r.facets.deletions + + +@pytest.mark.sync +def test_term_filters_are_shown_as_selected_and_data_is_filtered( + data_client: Elasticsearch, commit_search_cls: Type[FacetedSearch] +) -> None: + cs = commit_search_cls(filters={"files": "test_elasticsearch_dsl"}) + + r = cs.execute() + + assert 35 == r.hits.total.value # type: ignore[attr-defined] + assert [ + ("elasticsearch_dsl", 40, False), + ("test_elasticsearch_dsl", 35, True), # selected + ("elasticsearch_dsl/query.py", 19, False), + ("test_elasticsearch_dsl/test_search.py", 15, False), + ("elasticsearch_dsl/utils.py", 14, False), + ("test_elasticsearch_dsl/test_query.py", 13, False), + ("elasticsearch_dsl/search.py", 12, False), + ("elasticsearch_dsl/aggs.py", 11, False), + ("test_elasticsearch_dsl/test_result.py", 5, False), + ("elasticsearch_dsl/result.py", 3, False), + ] == r.facets.files + + assert [ + (datetime(2014, 3, 3, 0, 0), 1, False), + (datetime(2014, 3, 5, 0, 0), 2, False), + (datetime(2014, 3, 6, 0, 0), 3, False), + (datetime(2014, 3, 7, 0, 0), 6, False), + (datetime(2014, 3, 10, 0, 0), 1, False), + (datetime(2014, 3, 15, 0, 0), 3, False), + (datetime(2014, 3, 21, 0, 0), 2, False), + (datetime(2014, 3, 23, 0, 0), 1, False), + (datetime(2014, 3, 24, 0, 0), 7, False), + (datetime(2014, 4, 20, 0, 0), 1, False), + (datetime(2014, 4, 25, 0, 0), 3, False), + (datetime(2014, 4, 26, 0, 0), 2, False), + (datetime(2014, 4, 27, 0, 0), 1, False), + (datetime(2014, 5, 1, 0, 0), 1, False), + (datetime(2014, 5, 2, 0, 0), 1, False), + ] == r.facets.frequency + + assert [ + ("ok", 12, False), + ("good", 10, False), + ("better", 13, False), + ] == r.facets.deletions + + +@pytest.mark.sync +def test_range_filters_are_shown_as_selected_and_data_is_filtered( + data_client: Elasticsearch, commit_search_cls: Type[FacetedSearch] +) -> None: + cs = commit_search_cls(filters={"deletions": "better"}) + + r = cs.execute() + + assert 19 == r.hits.total.value # type: ignore[attr-defined] + + +@pytest.mark.sync +def test_pagination( + data_client: Elasticsearch, commit_search_cls: Type[FacetedSearch] +) -> None: + cs = commit_search_cls() + cs = cs[0:20] + + assert 52 == cs.count() + assert 20 == len(cs.execute()) diff --git a/test_elasticsearch/test_dsl/test_integration/_sync/test_index.py b/test_elasticsearch/test_dsl/test_integration/_sync/test_index.py new file mode 100644 index 000000000..7509f0b0f --- /dev/null +++ b/test_elasticsearch/test_dsl/test_integration/_sync/test_index.py @@ -0,0 +1,160 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import pytest +from elasticsearch import Elasticsearch + +from elasticsearch.dsl import ( + ComposableIndexTemplate, + Date, + Document, + Index, + IndexTemplate, + Text, + analysis, +) + + +class Post(Document): + title = Text(analyzer=analysis.analyzer("my_analyzer", tokenizer="keyword")) + published_from = Date() + + +@pytest.mark.sync +def test_index_template_works(write_client: Elasticsearch) -> None: + it = IndexTemplate("test-template", "test-legacy-*") + it.document(Post) + it.settings(number_of_replicas=0, number_of_shards=1) + it.save() + + i = Index("test-legacy-blog") + i.create() + + assert { + "test-legacy-blog": { + "mappings": { + "properties": { + "title": {"type": "text", "analyzer": "my_analyzer"}, + "published_from": {"type": "date"}, + } + } + } + } == write_client.indices.get_mapping(index="test-legacy-blog") + + +@pytest.mark.sync +def test_composable_index_template_works( + write_client: Elasticsearch, +) -> None: + it = ComposableIndexTemplate("test-template", "test-*") + it.document(Post) + it.settings(number_of_replicas=0, number_of_shards=1) + it.save() + + i = Index("test-blog") + i.create() + + assert { + "test-blog": { + "mappings": { + "properties": { + "title": {"type": "text", "analyzer": "my_analyzer"}, + "published_from": {"type": "date"}, + } + } + } + } == write_client.indices.get_mapping(index="test-blog") + + +@pytest.mark.sync +def test_index_can_be_saved_even_with_settings( + write_client: Elasticsearch, +) -> None: + i = Index("test-blog", using=write_client) + i.settings(number_of_shards=3, number_of_replicas=0) + i.save() + i.settings(number_of_replicas=1) + i.save() + + assert ( + "1" + == (i.get_settings())["test-blog"]["settings"]["index"]["number_of_replicas"] + ) + + +@pytest.mark.sync +def test_index_exists(data_client: Elasticsearch) -> None: + assert Index("git").exists() + assert not Index("not-there").exists() + + +@pytest.mark.sync +def test_index_can_be_created_with_settings_and_mappings( + write_client: Elasticsearch, +) -> None: + i = Index("test-blog", using=write_client) + i.document(Post) + i.settings(number_of_replicas=0, number_of_shards=1) + i.create() + + assert { + "test-blog": { + "mappings": { + "properties": { + "title": {"type": "text", "analyzer": "my_analyzer"}, + "published_from": {"type": "date"}, + } + } + } + } == write_client.indices.get_mapping(index="test-blog") + + settings = write_client.indices.get_settings(index="test-blog") + assert settings["test-blog"]["settings"]["index"]["number_of_replicas"] == "0" + assert settings["test-blog"]["settings"]["index"]["number_of_shards"] == "1" + assert settings["test-blog"]["settings"]["index"]["analysis"] == { + "analyzer": {"my_analyzer": {"type": "custom", "tokenizer": "keyword"}} + } + + +@pytest.mark.sync +def test_delete(write_client: Elasticsearch) -> None: + write_client.indices.create( + index="test-index", + body={"settings": {"number_of_replicas": 0, "number_of_shards": 1}}, + ) + + i = Index("test-index", using=write_client) + i.delete() + assert not write_client.indices.exists(index="test-index") + + +@pytest.mark.sync +def test_multiple_indices_with_same_doc_type_work( + write_client: Elasticsearch, +) -> None: + i1 = Index("test-index-1", using=write_client) + i2 = Index("test-index-2", using=write_client) + + for i in (i1, i2): + i.document(Post) + i.create() + + for j in ("test-index-1", "test-index-2"): + settings = write_client.indices.get_settings(index=j) + assert settings[j]["settings"]["index"]["analysis"] == { + "analyzer": {"my_analyzer": {"type": "custom", "tokenizer": "keyword"}} + } diff --git a/test_elasticsearch/test_dsl/test_integration/_sync/test_mapping.py b/test_elasticsearch/test_dsl/test_integration/_sync/test_mapping.py new file mode 100644 index 000000000..270e79a5e --- /dev/null +++ b/test_elasticsearch/test_dsl/test_integration/_sync/test_mapping.py @@ -0,0 +1,169 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import pytest +from elasticsearch import Elasticsearch +from pytest import raises + +from elasticsearch.dsl import Mapping, analysis, exceptions + + +@pytest.mark.sync +def test_mapping_saved_into_es(write_client: Elasticsearch) -> None: + m = Mapping() + m.field( + "name", "text", analyzer=analysis.analyzer("my_analyzer", tokenizer="keyword") + ) + m.field("tags", "keyword") + m.save("test-mapping", using=write_client) + + assert { + "test-mapping": { + "mappings": { + "properties": { + "name": {"type": "text", "analyzer": "my_analyzer"}, + "tags": {"type": "keyword"}, + } + } + } + } == write_client.indices.get_mapping(index="test-mapping") + + +@pytest.mark.sync +def test_mapping_saved_into_es_when_index_already_exists_closed( + write_client: Elasticsearch, +) -> None: + m = Mapping() + m.field( + "name", "text", analyzer=analysis.analyzer("my_analyzer", tokenizer="keyword") + ) + write_client.indices.create(index="test-mapping") + + with raises(exceptions.IllegalOperation): + m.save("test-mapping", using=write_client) + + write_client.cluster.health(index="test-mapping", wait_for_status="yellow") + write_client.indices.close(index="test-mapping") + m.save("test-mapping", using=write_client) + + assert { + "test-mapping": { + "mappings": { + "properties": {"name": {"type": "text", "analyzer": "my_analyzer"}} + } + } + } == write_client.indices.get_mapping(index="test-mapping") + + +@pytest.mark.sync +def test_mapping_saved_into_es_when_index_already_exists_with_analysis( + write_client: Elasticsearch, +) -> None: + m = Mapping() + analyzer = analysis.analyzer("my_analyzer", tokenizer="keyword") + m.field("name", "text", analyzer=analyzer) + + new_analysis = analyzer.get_analysis_definition() + new_analysis["analyzer"]["other_analyzer"] = { + "type": "custom", + "tokenizer": "whitespace", + } + write_client.indices.create( + index="test-mapping", body={"settings": {"analysis": new_analysis}} + ) + + m.field("title", "text", analyzer=analyzer) + m.save("test-mapping", using=write_client) + + assert { + "test-mapping": { + "mappings": { + "properties": { + "name": {"type": "text", "analyzer": "my_analyzer"}, + "title": {"type": "text", "analyzer": "my_analyzer"}, + } + } + } + } == write_client.indices.get_mapping(index="test-mapping") + + +@pytest.mark.sync +def test_mapping_gets_updated_from_es( + write_client: Elasticsearch, +) -> None: + write_client.indices.create( + index="test-mapping", + body={ + "settings": {"number_of_shards": 1, "number_of_replicas": 0}, + "mappings": { + "date_detection": False, + "properties": { + "title": { + "type": "text", + "analyzer": "snowball", + "fields": {"raw": {"type": "keyword"}}, + }, + "created_at": {"type": "date"}, + "comments": { + "type": "nested", + "properties": { + "created": {"type": "date"}, + "author": { + "type": "text", + "analyzer": "snowball", + "fields": {"raw": {"type": "keyword"}}, + }, + }, + }, + }, + }, + }, + ) + + m = Mapping.from_es("test-mapping", using=write_client) + + assert ["comments", "created_at", "title"] == list( + sorted(m.properties.properties._d_.keys()) # type: ignore[attr-defined] + ) + assert { + "date_detection": False, + "properties": { + "comments": { + "type": "nested", + "properties": { + "created": {"type": "date"}, + "author": { + "analyzer": "snowball", + "fields": {"raw": {"type": "keyword"}}, + "type": "text", + }, + }, + }, + "created_at": {"type": "date"}, + "title": { + "analyzer": "snowball", + "fields": {"raw": {"type": "keyword"}}, + "type": "text", + }, + }, + } == m.to_dict() + + # test same with alias + write_client.indices.put_alias(index="test-mapping", name="test-alias") + + m2 = Mapping.from_es("test-alias", using=write_client) + assert m2.to_dict() == m.to_dict() diff --git a/test_elasticsearch/test_dsl/test_integration/_sync/test_search.py b/test_elasticsearch/test_dsl/test_integration/_sync/test_search.py new file mode 100644 index 000000000..1ce578fa5 --- /dev/null +++ b/test_elasticsearch/test_dsl/test_integration/_sync/test_search.py @@ -0,0 +1,294 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + + +import pytest +from elasticsearch import ApiError, Elasticsearch +from pytest import raises + +from elasticsearch.dsl import Date, Document, Keyword, MultiSearch, Q, Search, Text +from elasticsearch.dsl.response import aggs + +from ..test_data import FLAT_DATA + + +class Repository(Document): + created_at = Date() + description = Text(analyzer="snowball") + tags = Keyword() + + @classmethod + def search(cls) -> Search["Repository"]: # type: ignore[override] + return super().search().filter("term", commit_repo="repo") + + class Index: + name = "git" + + +class Commit(Document): + class Index: + name = "flat-git" + + +@pytest.mark.sync +def test_filters_aggregation_buckets_are_accessible( + data_client: Elasticsearch, +) -> None: + has_tests_query = Q("term", files="test_elasticsearch_dsl") + s = Commit.search()[0:0] + s.aggs.bucket("top_authors", "terms", field="author.name.raw").bucket( + "has_tests", "filters", filters={"yes": has_tests_query, "no": ~has_tests_query} + ).metric("lines", "stats", field="stats.lines") + + response = s.execute() + + assert isinstance( + response.aggregations.top_authors.buckets[0].has_tests.buckets.yes, aggs.Bucket + ) + assert ( + 35 + == response.aggregations.top_authors.buckets[0].has_tests.buckets.yes.doc_count + ) + assert ( + 228 + == response.aggregations.top_authors.buckets[0].has_tests.buckets.yes.lines.max + ) + + +@pytest.mark.sync +def test_top_hits_are_wrapped_in_response( + data_client: Elasticsearch, +) -> None: + s = Commit.search()[0:0] + s.aggs.bucket("top_authors", "terms", field="author.name.raw").metric( + "top_commits", "top_hits", size=5 + ) + response = s.execute() + + top_commits = response.aggregations.top_authors.buckets[0].top_commits + assert isinstance(top_commits, aggs.TopHitsData) + assert 5 == len(top_commits) + + hits = [h for h in top_commits] + assert 5 == len(hits) + assert isinstance(hits[0], Commit) + + +@pytest.mark.sync +def test_inner_hits_are_wrapped_in_response( + data_client: Elasticsearch, +) -> None: + s = Search(index="git")[0:1].query( + "has_parent", parent_type="repo", inner_hits={}, query=Q("match_all") + ) + response = s.execute() + + commit = response.hits[0] + assert isinstance(commit.meta.inner_hits.repo, response.__class__) + assert repr(commit.meta.inner_hits.repo[0]).startswith( + " None: + s = Search(index="git")[0:1].query( + "has_parent", parent_type="repo", inner_hits={}, query=Q("match_all") + ) + response = s.execute() + d = response.to_dict(recursive=True) + assert isinstance(d, dict) + assert isinstance(d["hits"]["hits"][0]["inner_hits"]["repo"], dict) + + # iterating over the results changes the format of the internal AttrDict + for hit in response: + pass + + d = response.to_dict(recursive=True) + assert isinstance(d, dict) + assert isinstance(d["hits"]["hits"][0]["inner_hits"]["repo"], dict) + + +@pytest.mark.sync +def test_scan_respects_doc_types(data_client: Elasticsearch) -> None: + repos = [repo for repo in Repository.search().scan()] + + assert 1 == len(repos) + assert isinstance(repos[0], Repository) + assert repos[0].organization == "elasticsearch" + + +@pytest.mark.sync +def test_scan_iterates_through_all_docs( + data_client: Elasticsearch, +) -> None: + s = Search(index="flat-git") + + commits = [commit for commit in s.scan()] + + assert 52 == len(commits) + assert {d["_id"] for d in FLAT_DATA} == {c.meta.id for c in commits} + + +@pytest.mark.sync +def test_search_after(data_client: Elasticsearch) -> None: + page_size = 7 + s = Search(index="flat-git")[:page_size].sort("authored_date") + commits = [] + while True: + r = s.execute() + commits += r.hits + if len(r.hits) < page_size: + break + s = s.search_after() + + assert 52 == len(commits) + assert {d["_id"] for d in FLAT_DATA} == {c.meta.id for c in commits} + + +@pytest.mark.sync +def test_search_after_no_search(data_client: Elasticsearch) -> None: + s = Search(index="flat-git") + with raises( + ValueError, match="A search must be executed before using search_after" + ): + s.search_after() + s.count() + with raises( + ValueError, match="A search must be executed before using search_after" + ): + s.search_after() + + +@pytest.mark.sync +def test_search_after_no_sort(data_client: Elasticsearch) -> None: + s = Search(index="flat-git") + r = s.execute() + with raises( + ValueError, match="Cannot use search_after when results are not sorted" + ): + r.search_after() + + +@pytest.mark.sync +def test_search_after_no_results(data_client: Elasticsearch) -> None: + s = Search(index="flat-git")[:100].sort("authored_date") + r = s.execute() + assert 52 == len(r.hits) + s = s.search_after() + r = s.execute() + assert 0 == len(r.hits) + with raises( + ValueError, match="Cannot use search_after when there are no search results" + ): + r.search_after() + + +@pytest.mark.sync +def test_point_in_time(data_client: Elasticsearch) -> None: + page_size = 7 + commits = [] + with Search(index="flat-git")[:page_size].point_in_time(keep_alive="30s") as s: + pit_id = s._extra["pit"]["id"] + while True: + r = s.execute() + commits += r.hits + if len(r.hits) < page_size: + break + s = s.search_after() + assert pit_id == s._extra["pit"]["id"] + assert "30s" == s._extra["pit"]["keep_alive"] + + assert 52 == len(commits) + assert {d["_id"] for d in FLAT_DATA} == {c.meta.id for c in commits} + + +@pytest.mark.sync +def test_iterate(data_client: Elasticsearch) -> None: + s = Search(index="flat-git") + + commits = [commit for commit in s.iterate()] + + assert 52 == len(commits) + assert {d["_id"] for d in FLAT_DATA} == {c.meta.id for c in commits} + + +@pytest.mark.sync +def test_response_is_cached(data_client: Elasticsearch) -> None: + s = Repository.search() + repos = [repo for repo in s] + + assert hasattr(s, "_response") + assert s._response.hits == repos + + +@pytest.mark.sync +def test_multi_search(data_client: Elasticsearch) -> None: + s1 = Repository.search() + s2 = Search[Repository](index="flat-git") + + ms = MultiSearch[Repository]() + ms = ms.add(s1).add(s2) + + r1, r2 = ms.execute() + + assert 1 == len(r1) + assert isinstance(r1[0], Repository) + assert r1._search is s1 + + assert 52 == r2.hits.total.value # type: ignore[attr-defined] + assert r2._search is s2 + + +@pytest.mark.sync +def test_multi_missing(data_client: Elasticsearch) -> None: + s1 = Repository.search() + s2 = Search[Repository](index="flat-git") + s3 = Search[Repository](index="does_not_exist") + + ms = MultiSearch[Repository]() + ms = ms.add(s1).add(s2).add(s3) + + with raises(ApiError): + ms.execute() + + r1, r2, r3 = ms.execute(raise_on_error=False) + + assert 1 == len(r1) + assert isinstance(r1[0], Repository) + assert r1._search is s1 + + assert 52 == r2.hits.total.value # type: ignore[attr-defined] + assert r2._search is s2 + + assert r3 is None + + +@pytest.mark.sync +def test_raw_subfield_can_be_used_in_aggs( + data_client: Elasticsearch, +) -> None: + s = Search(index="git")[0:0] + s.aggs.bucket("authors", "terms", field="author.name.raw", size=1) + + r = s.execute() + + authors = r.aggregations.authors + assert 1 == len(authors) + assert {"key": "Honza Král", "doc_count": 52} == authors[0] diff --git a/test_elasticsearch/test_dsl/test_integration/_sync/test_update_by_query.py b/test_elasticsearch/test_dsl/test_integration/_sync/test_update_by_query.py new file mode 100644 index 000000000..f16505d49 --- /dev/null +++ b/test_elasticsearch/test_dsl/test_integration/_sync/test_update_by_query.py @@ -0,0 +1,85 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import pytest +from elasticsearch import Elasticsearch + +from elasticsearch.dsl import UpdateByQuery +from elasticsearch.dsl.search import Q + + +@pytest.mark.sync +def test_update_by_query_no_script( + write_client: Elasticsearch, setup_ubq_tests: str +) -> None: + index = setup_ubq_tests + + ubq = ( + UpdateByQuery(using=write_client) + .index(index) + .filter(~Q("exists", field="is_public")) + ) + response = ubq.execute() + + assert response.total == 52 + assert response["took"] > 0 + assert not response.timed_out + assert response.updated == 52 + assert response.deleted == 0 + assert response.took > 0 + assert response.success() + + +@pytest.mark.sync +def test_update_by_query_with_script( + write_client: Elasticsearch, setup_ubq_tests: str +) -> None: + index = setup_ubq_tests + + ubq = ( + UpdateByQuery(using=write_client) + .index(index) + .filter(~Q("exists", field="parent_shas")) + .script(source="ctx._source.is_public = false") + ) + ubq = ubq.params(conflicts="proceed") + + response = ubq.execute() + assert response.total == 2 + assert response.updated == 2 + assert response.version_conflicts == 0 + + +@pytest.mark.sync +def test_delete_by_query_with_script( + write_client: Elasticsearch, setup_ubq_tests: str +) -> None: + index = setup_ubq_tests + + ubq = ( + UpdateByQuery(using=write_client) + .index(index) + .filter(Q("match", parent_shas="1dd19210b5be92b960f7db6f66ae526288edccc3")) + .script(source='ctx.op = "delete"') + ) + ubq = ubq.params(conflicts="proceed") + + response = ubq.execute() + + assert response.total == 1 + assert response.deleted == 1 + assert response.success() diff --git a/test_elasticsearch/test_dsl/test_integration/test_count.py b/test_elasticsearch/test_dsl/test_integration/test_count.py new file mode 100644 index 000000000..5d52607bc --- /dev/null +++ b/test_elasticsearch/test_dsl/test_integration/test_count.py @@ -0,0 +1,46 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import Any + +from elasticsearch import Elasticsearch + +from elasticsearch.dsl.search import Q, Search + + +def test_count_all(data_client: Elasticsearch) -> None: + s = Search(using=data_client).index("git") + assert 53 == s.count() + + +def test_count_prefetch(data_client: Elasticsearch, mocker: Any) -> None: + mocker.spy(data_client, "count") + + search = Search(using=data_client).index("git") + search.execute() + assert search.count() == 53 + assert data_client.count.call_count == 0 # type: ignore[attr-defined] + + search._response.hits.total.relation = "gte" # type: ignore[attr-defined] + assert search.count() == 53 + assert data_client.count.call_count == 1 # type: ignore[attr-defined] + + +def test_count_filter(data_client: Elasticsearch) -> None: + s = Search(using=data_client).index("git").filter(~Q("exists", field="parent_shas")) + # initial commit + repo document + assert 2 == s.count() diff --git a/test_elasticsearch/test_dsl/test_integration/test_data.py b/test_elasticsearch/test_dsl/test_integration/test_data.py new file mode 100644 index 000000000..1e80896ab --- /dev/null +++ b/test_elasticsearch/test_dsl/test_integration/test_data.py @@ -0,0 +1,1093 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import Any, Dict + +from elasticsearch import Elasticsearch + +user_mapping = { + "properties": {"name": {"type": "text", "fields": {"raw": {"type": "keyword"}}}} +} + +FLAT_GIT_INDEX: Dict[str, Any] = { + "settings": { + # custom analyzer for analyzing file paths + "analysis": { + "analyzer": { + "file_path": { + "type": "custom", + "tokenizer": "path_hierarchy", + "filter": ["lowercase"], + } + } + }, + }, + "mappings": { + "properties": { + "description": {"type": "text", "analyzer": "snowball"}, + "author": user_mapping, + "authored_date": {"type": "date"}, + "committer": user_mapping, + "committed_date": {"type": "date"}, + "parent_shas": {"type": "keyword"}, + "files": { + "type": "text", + "analyzer": "file_path", + "fielddata": True, + }, + } + }, +} + +GIT_INDEX: Dict[str, Any] = { + "settings": { + # custom analyzer for analyzing file paths + "analysis": { + "analyzer": { + "file_path": { + "type": "custom", + "tokenizer": "path_hierarchy", + "filter": ["lowercase"], + } + } + }, + }, + "mappings": { + "properties": { + # common fields + "description": {"type": "text", "analyzer": "snowball"}, + "commit_repo": {"type": "join", "relations": {"repo": "commit"}}, + # COMMIT mappings + "author": user_mapping, + "authored_date": {"type": "date"}, + "committer": user_mapping, + "committed_date": {"type": "date"}, + "parent_shas": {"type": "keyword"}, + "files": { + "type": "text", + "analyzer": "file_path", + "fielddata": True, + }, + # REPO mappings + "is_public": {"type": "boolean"}, + "owner": user_mapping, + "created_at": {"type": "date"}, + "tags": {"type": "keyword"}, + } + }, +} + + +def create_flat_git_index(client: Elasticsearch, index: str) -> None: + client.indices.create(index=index, body=FLAT_GIT_INDEX) + + +def create_git_index(client: Elasticsearch, index: str) -> None: + client.indices.create(index=index, body=GIT_INDEX) + + +DATA = [ + # repository + { + "_id": "elasticsearch-dsl-py", + "_source": { + "commit_repo": "repo", + "organization": "elasticsearch", + "created_at": "2014-03-03", + "owner": {"name": "elasticsearch"}, + "is_public": True, + }, + "_index": "git", + }, + # documents + { + "_id": "3ca6e1e73a071a705b4babd2f581c91a2a3e5037", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": [ + "elasticsearch_dsl/aggs.py", + "elasticsearch_dsl/search.py", + "test_elasticsearch_dsl/test_aggs.py", + "test_elasticsearch_dsl/test_search.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 7, "insertions": 23, "lines": 30, "files": 4}, + "description": "Make sure buckets aren't modified in-place", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["eb3e543323f189fd7b698e66295427204fff5755"], + "committed_date": "2014-05-02T13:47:19", + "authored_date": "2014-05-02T13:47:19.123+02:00", + }, + "_index": "git", + }, + { + "_id": "eb3e543323f189fd7b698e66295427204fff5755", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": ["elasticsearch_dsl/search.py"], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 0, "insertions": 18, "lines": 18, "files": 1}, + "description": "Add communication with ES server", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["dd15b6ba17dd9ba16363a51f85b31f66f1fb1157"], + "committed_date": "2014-05-01T13:32:14", + "authored_date": "2014-05-01T13:32:14", + }, + "_index": "git", + }, + { + "_id": "dd15b6ba17dd9ba16363a51f85b31f66f1fb1157", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": [ + "elasticsearch_dsl/utils.py", + "test_elasticsearch_dsl/test_result.py", + "elasticsearch_dsl/result.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 18, "insertions": 44, "lines": 62, "files": 3}, + "description": "Minor cleanup and adding helpers for interactive python", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["ed19caf25abd25300e707fadf3f81b05c5673446"], + "committed_date": "2014-05-01T13:30:44", + "authored_date": "2014-05-01T13:30:44", + }, + "_index": "git", + }, + { + "_id": "ed19caf25abd25300e707fadf3f81b05c5673446", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": [ + "elasticsearch_dsl/aggs.py", + "elasticsearch_dsl/search.py", + "test_elasticsearch_dsl/test_search.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 0, "insertions": 28, "lines": 28, "files": 3}, + "description": "Make sure aggs do copy-on-write", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["583e52c71e9a72c1b291ec5843683d8fa8f1ce2d"], + "committed_date": "2014-04-27T16:28:09", + "authored_date": "2014-04-27T16:28:09", + }, + "_index": "git", + }, + { + "_id": "583e52c71e9a72c1b291ec5843683d8fa8f1ce2d", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": ["elasticsearch_dsl/aggs.py"], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 1, "insertions": 1, "lines": 2, "files": 1}, + "description": "Use __setitem__ from DslBase in AggsBase", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["1dd19210b5be92b960f7db6f66ae526288edccc3"], + "committed_date": "2014-04-27T15:51:53", + "authored_date": "2014-04-27T15:51:53", + }, + "_index": "git", + }, + { + "_id": "1dd19210b5be92b960f7db6f66ae526288edccc3", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": [ + "elasticsearch_dsl/aggs.py", + "elasticsearch_dsl/query.py", + "test_elasticsearch_dsl/test_search.py", + "elasticsearch_dsl/search.py", + "elasticsearch_dsl/filter.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 21, "insertions": 98, "lines": 119, "files": 5}, + "description": "Have Search clone itself on any change besides aggs", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["b4c9e29376af2e42a4e6dc153f0f293b1a18bac3"], + "committed_date": "2014-04-26T14:49:43", + "authored_date": "2014-04-26T14:49:43", + }, + "_index": "git", + }, + { + "_id": "b4c9e29376af2e42a4e6dc153f0f293b1a18bac3", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": ["test_elasticsearch_dsl/test_result.py"], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 0, "insertions": 5, "lines": 5, "files": 1}, + "description": "Add tests for [] on response", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["a64a54181b232bb5943bd16960be9416e402f5f5"], + "committed_date": "2014-04-26T13:56:52", + "authored_date": "2014-04-26T13:56:52", + }, + "_index": "git", + }, + { + "_id": "a64a54181b232bb5943bd16960be9416e402f5f5", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": ["test_elasticsearch_dsl/test_result.py"], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 1, "insertions": 7, "lines": 8, "files": 1}, + "description": "Test access to missing fields raises appropriate exceptions", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["df3f778a3d37b170bde6979a4ef2d9e3e6400778"], + "committed_date": "2014-04-25T16:01:07", + "authored_date": "2014-04-25T16:01:07", + }, + "_index": "git", + }, + { + "_id": "df3f778a3d37b170bde6979a4ef2d9e3e6400778", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": [ + "elasticsearch_dsl/utils.py", + "test_elasticsearch_dsl/test_result.py", + "elasticsearch_dsl/result.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 8, "insertions": 31, "lines": 39, "files": 3}, + "description": "Support attribute access even for inner/nested objects", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["7e599e116b5ff5d271ce3fe1ebc80e82ab3d5925"], + "committed_date": "2014-04-25T15:59:02", + "authored_date": "2014-04-25T15:59:02", + }, + "_index": "git", + }, + { + "_id": "7e599e116b5ff5d271ce3fe1ebc80e82ab3d5925", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": [ + "test_elasticsearch_dsl/test_result.py", + "elasticsearch_dsl/result.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 0, "insertions": 149, "lines": 149, "files": 2}, + "description": "Added a prototype of a Respose and Result classes", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["e2882d28cb8077eaa3e5d8ae76543482d4d90f7e"], + "committed_date": "2014-04-25T15:12:15", + "authored_date": "2014-04-25T15:12:15", + }, + "_index": "git", + }, + { + "_id": "e2882d28cb8077eaa3e5d8ae76543482d4d90f7e", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": ["docs/index.rst"], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 0, "insertions": 6, "lines": 6, "files": 1}, + "description": "add warning to the docs", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["51f94d83d1c47d3b81207736ca97a1ec6302678f"], + "committed_date": "2014-04-22T19:16:21", + "authored_date": "2014-04-22T19:16:21", + }, + "_index": "git", + }, + { + "_id": "51f94d83d1c47d3b81207736ca97a1ec6302678f", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": ["elasticsearch_dsl/utils.py"], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 3, "insertions": 29, "lines": 32, "files": 1}, + "description": "Add some comments to the code", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["0950f6c600b49e2bf012d03b02250fb71c848555"], + "committed_date": "2014-04-22T19:12:06", + "authored_date": "2014-04-22T19:12:06", + }, + "_index": "git", + }, + { + "_id": "0950f6c600b49e2bf012d03b02250fb71c848555", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": ["README.rst"], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 0, "insertions": 6, "lines": 6, "files": 1}, + "description": "Added a WIP warning", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["54d058f5ac6be8225ef61d5529772aada42ec6c8"], + "committed_date": "2014-04-20T00:19:25", + "authored_date": "2014-04-20T00:19:25", + }, + "_index": "git", + }, + { + "_id": "54d058f5ac6be8225ef61d5529772aada42ec6c8", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": [ + "elasticsearch_dsl/__init__.py", + "elasticsearch_dsl/search.py", + "test_elasticsearch_dsl/test_search.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 36, "insertions": 7, "lines": 43, "files": 3}, + "description": "Remove the operator kwarg from .query", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["4cb07845e45787abc1f850c0b561e487e0034424"], + "committed_date": "2014-04-20T00:17:25", + "authored_date": "2014-04-20T00:17:25", + }, + "_index": "git", + }, + { + "_id": "4cb07845e45787abc1f850c0b561e487e0034424", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": [ + "elasticsearch_dsl/aggs.py", + "test_elasticsearch_dsl/test_search.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 35, "insertions": 49, "lines": 84, "files": 2}, + "description": "Complex example", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["578abe80f76aafd7e81fe46a44403e601733a938"], + "committed_date": "2014-03-24T20:48:45", + "authored_date": "2014-03-24T20:48:45", + }, + "_index": "git", + }, + { + "_id": "578abe80f76aafd7e81fe46a44403e601733a938", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": ["test_elasticsearch_dsl/test_search.py"], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 2, "insertions": 0, "lines": 2, "files": 1}, + "description": "removing extra whitespace", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["ecb84f03565940c7d294dbc80723420dcfbab340"], + "committed_date": "2014-03-24T20:42:23", + "authored_date": "2014-03-24T20:42:23", + }, + "_index": "git", + }, + { + "_id": "ecb84f03565940c7d294dbc80723420dcfbab340", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": ["test_elasticsearch_dsl/test_search.py"], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 1, "insertions": 3, "lines": 4, "files": 1}, + "description": "Make sure attribute access works for .query on Search", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["9a247c876ab66e2bca56b25f392d054e613b1b2a"], + "committed_date": "2014-03-24T20:35:02", + "authored_date": "2014-03-24T20:34:46", + }, + "_index": "git", + }, + { + "_id": "9a247c876ab66e2bca56b25f392d054e613b1b2a", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": ["elasticsearch_dsl/search.py"], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 0, "insertions": 2, "lines": 2, "files": 1}, + "description": "Make sure .index and .doc_type methods are chainable", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["cee5e46947d510a49edd3609ff91aab7b1f3ac89"], + "committed_date": "2014-03-24T20:27:46", + "authored_date": "2014-03-24T20:27:46", + }, + "_index": "git", + }, + { + "_id": "cee5e46947d510a49edd3609ff91aab7b1f3ac89", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": [ + "elasticsearch_dsl/search.py", + "test_elasticsearch_dsl/test_search.py", + "elasticsearch_dsl/filter.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 13, "insertions": 128, "lines": 141, "files": 3}, + "description": "Added .filter and .post_filter to Search", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["1d6857182b09a556d58c6bc5bdcb243092812ba3"], + "committed_date": "2014-03-24T20:26:57", + "authored_date": "2014-03-24T20:26:57", + }, + "_index": "git", + }, + { + "_id": "1d6857182b09a556d58c6bc5bdcb243092812ba3", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": ["elasticsearch_dsl/utils.py", "elasticsearch_dsl/query.py"], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 24, "insertions": 29, "lines": 53, "files": 2}, + "description": "Extracted combination logic into DslBase", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["4ad92f15a1955846c01642318303a821e8435b75"], + "committed_date": "2014-03-24T20:03:51", + "authored_date": "2014-03-24T20:03:51", + }, + "_index": "git", + }, + { + "_id": "4ad92f15a1955846c01642318303a821e8435b75", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": ["elasticsearch_dsl/utils.py", "elasticsearch_dsl/query.py"], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 43, "insertions": 45, "lines": 88, "files": 2}, + "description": "Extracted bool-related logic to a mixin to be reused by filters", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["6eb39dc2825605543ac1ed0b45b9b6baeecc44c2"], + "committed_date": "2014-03-24T19:16:16", + "authored_date": "2014-03-24T19:16:16", + }, + "_index": "git", + }, + { + "_id": "6eb39dc2825605543ac1ed0b45b9b6baeecc44c2", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": [ + "elasticsearch_dsl/search.py", + "test_elasticsearch_dsl/test_search.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 1, "insertions": 32, "lines": 33, "files": 2}, + "description": "Enable otheroperators when querying on Search object", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["be094c7b307332cb6039bf9a7c984d2c7593ddff"], + "committed_date": "2014-03-24T18:25:10", + "authored_date": "2014-03-24T18:25:10", + }, + "_index": "git", + }, + { + "_id": "be094c7b307332cb6039bf9a7c984d2c7593ddff", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": [ + "elasticsearch_dsl/utils.py", + "elasticsearch_dsl/query.py", + "test_elasticsearch_dsl/test_query.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 23, "insertions": 35, "lines": 58, "files": 3}, + "description": "make sure query operations always return copies", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["b2576e3b6437e2cb9d8971fee4ead60df91fd75b"], + "committed_date": "2014-03-24T18:10:37", + "authored_date": "2014-03-24T18:03:13", + }, + "_index": "git", + }, + { + "_id": "b2576e3b6437e2cb9d8971fee4ead60df91fd75b", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": [ + "elasticsearch_dsl/query.py", + "test_elasticsearch_dsl/test_query.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 1, "insertions": 53, "lines": 54, "files": 2}, + "description": "Adding or operator for queries", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["1be002170ac3cd59d2e97824b83b88bb3c9c60ed"], + "committed_date": "2014-03-24T17:53:38", + "authored_date": "2014-03-24T17:53:38", + }, + "_index": "git", + }, + { + "_id": "1be002170ac3cd59d2e97824b83b88bb3c9c60ed", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": [ + "elasticsearch_dsl/query.py", + "test_elasticsearch_dsl/test_query.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 0, "insertions": 35, "lines": 35, "files": 2}, + "description": "Added inverting of queries", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["24e1e38b2f704f65440d96c290b7c6cd54c2e00e"], + "committed_date": "2014-03-23T17:44:36", + "authored_date": "2014-03-23T17:44:36", + }, + "_index": "git", + }, + { + "_id": "24e1e38b2f704f65440d96c290b7c6cd54c2e00e", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": ["elasticsearch_dsl/aggs.py", "elasticsearch_dsl/utils.py"], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 5, "insertions": 1, "lines": 6, "files": 2}, + "description": "Change equality checks to use .to_dict()", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["277cfaedbaf3705ed74ad6296227e1172c97a63f"], + "committed_date": "2014-03-23T17:43:01", + "authored_date": "2014-03-23T17:43:01", + }, + "_index": "git", + }, + { + "_id": "277cfaedbaf3705ed74ad6296227e1172c97a63f", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": [ + "elasticsearch_dsl/query.py", + "test_elasticsearch_dsl/test_query.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 1, "insertions": 11, "lines": 12, "files": 2}, + "description": "Test combining of bool queries", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["6aa3868a6a9f35f71553ce96f9d3d63c74d054fd"], + "committed_date": "2014-03-21T15:15:06", + "authored_date": "2014-03-21T15:15:06", + }, + "_index": "git", + }, + { + "_id": "6aa3868a6a9f35f71553ce96f9d3d63c74d054fd", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": [ + "elasticsearch_dsl/query.py", + "test_elasticsearch_dsl/test_query.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 1, "insertions": 23, "lines": 24, "files": 2}, + "description": "Adding & operator for queries", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["bb311eb35e7eb53fb5ae01e3f80336866c7e3e37"], + "committed_date": "2014-03-21T15:10:08", + "authored_date": "2014-03-21T15:10:08", + }, + "_index": "git", + }, + { + "_id": "bb311eb35e7eb53fb5ae01e3f80336866c7e3e37", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": [ + "elasticsearch_dsl/utils.py", + "test_elasticsearch_dsl/test_query.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 1, "insertions": 4, "lines": 5, "files": 2}, + "description": "Don't serialize empty typed fields into dict", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["aea8ea9e421bd53a5b058495e68c3fd57bb1dacc"], + "committed_date": "2014-03-15T16:29:37", + "authored_date": "2014-03-15T16:29:37", + }, + "_index": "git", + }, + { + "_id": "aea8ea9e421bd53a5b058495e68c3fd57bb1dacc", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": [ + "elasticsearch_dsl/utils.py", + "elasticsearch_dsl/query.py", + "test_elasticsearch_dsl/test_query.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 3, "insertions": 37, "lines": 40, "files": 3}, + "description": "Bool queries, when combining just adds their params together", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["a8819a510b919be43ff3011b904f257798fb8916"], + "committed_date": "2014-03-15T16:16:40", + "authored_date": "2014-03-15T16:16:40", + }, + "_index": "git", + }, + { + "_id": "a8819a510b919be43ff3011b904f257798fb8916", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": ["test_elasticsearch_dsl/run_tests.py"], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 6, "insertions": 2, "lines": 8, "files": 1}, + "description": "Simpler run_tests.py", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["e35792a725be2325fc54d3fcb95a7d38d8075a99"], + "committed_date": "2014-03-15T16:02:21", + "authored_date": "2014-03-15T16:02:21", + }, + "_index": "git", + }, + { + "_id": "e35792a725be2325fc54d3fcb95a7d38d8075a99", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": ["elasticsearch_dsl/aggs.py", "elasticsearch_dsl/query.py"], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 2, "insertions": 2, "lines": 4, "files": 2}, + "description": "Maku we don't treat shortcuts as methods.", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["3179d778dc9e3f2883d5f7ffa63b9ae0399c16bc"], + "committed_date": "2014-03-15T15:59:21", + "authored_date": "2014-03-15T15:59:21", + }, + "_index": "git", + }, + { + "_id": "3179d778dc9e3f2883d5f7ffa63b9ae0399c16bc", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": [ + "elasticsearch_dsl/aggs.py", + "elasticsearch_dsl/query.py", + "elasticsearch_dsl/utils.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 9, "insertions": 5, "lines": 14, "files": 3}, + "description": "Centralize == of Dsl objects", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["b5e7d0c4b284211df8f7b464fcece93a27a802fb"], + "committed_date": "2014-03-10T21:37:24", + "authored_date": "2014-03-10T21:37:24", + }, + "_index": "git", + }, + { + "_id": "b5e7d0c4b284211df8f7b464fcece93a27a802fb", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": [ + "elasticsearch_dsl/aggs.py", + "elasticsearch_dsl/search.py", + "test_elasticsearch_dsl/test_search.py", + "elasticsearch_dsl/utils.py", + "elasticsearch_dsl/query.py", + "test_elasticsearch_dsl/test_aggs.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 75, "insertions": 115, "lines": 190, "files": 6}, + "description": "Experimental draft with more declarative DSL", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["0fe741b43adee5ca1424584ddd3f35fa33f8733c"], + "committed_date": "2014-03-10T21:34:39", + "authored_date": "2014-03-10T21:34:39", + }, + "_index": "git", + }, + { + "_id": "0fe741b43adee5ca1424584ddd3f35fa33f8733c", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": ["test_elasticsearch_dsl/test_search.py"], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 2, "insertions": 2, "lines": 4, "files": 1}, + "description": "Make sure .query is chainable", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["a22be5933d4b022cbacee867b1aece120208edf3"], + "committed_date": "2014-03-07T17:41:59", + "authored_date": "2014-03-07T17:41:59", + }, + "_index": "git", + }, + { + "_id": "a22be5933d4b022cbacee867b1aece120208edf3", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": [ + "elasticsearch_dsl/aggs.py", + "elasticsearch_dsl/search.py", + "test_elasticsearch_dsl/test_search.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 14, "insertions": 44, "lines": 58, "files": 3}, + "description": "Search now does aggregations", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["e823686aacfc4bdcb34ffdab337a26fa09659a9a"], + "committed_date": "2014-03-07T17:29:55", + "authored_date": "2014-03-07T17:29:55", + }, + "_index": "git", + }, + { + "_id": "e823686aacfc4bdcb34ffdab337a26fa09659a9a", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": [".gitignore"], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 0, "insertions": 1, "lines": 1, "files": 1}, + "description": "Ignore html coverage report", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["e0aedb3011c71d704deec03a8f32b2b360d6e364"], + "committed_date": "2014-03-07T17:03:23", + "authored_date": "2014-03-07T17:03:23", + }, + "_index": "git", + }, + { + "_id": "e0aedb3011c71d704deec03a8f32b2b360d6e364", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": [ + "elasticsearch_dsl/aggs.py", + "test_elasticsearch_dsl/test_aggs.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 0, "insertions": 228, "lines": 228, "files": 2}, + "description": "Added aggregation DSL objects", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["61cbc0aa62a0b776ae5e333406659dbb2f5cfbbd"], + "committed_date": "2014-03-07T16:25:55", + "authored_date": "2014-03-07T16:25:55", + }, + "_index": "git", + }, + { + "_id": "61cbc0aa62a0b776ae5e333406659dbb2f5cfbbd", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": ["elasticsearch_dsl/utils.py", "elasticsearch_dsl/query.py"], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 12, "insertions": 7, "lines": 19, "files": 2}, + "description": "Only retrieve DslClass, leave the instantiation to the caller", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["647f1017a7b17a913e07af70a3b03202f6adbdfd"], + "committed_date": "2014-03-07T15:27:43", + "authored_date": "2014-03-07T15:27:43", + }, + "_index": "git", + }, + { + "_id": "647f1017a7b17a913e07af70a3b03202f6adbdfd", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": [ + "test_elasticsearch_dsl/test_search.py", + "elasticsearch_dsl/query.py", + "test_elasticsearch_dsl/test_query.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 19, "insertions": 19, "lines": 38, "files": 3}, + "description": "No need to replicate Query suffix when in query namespace", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["7c4f94ecdb38f0e91c7ee52f579c0ea148afcc7d"], + "committed_date": "2014-03-07T15:19:01", + "authored_date": "2014-03-07T15:19:01", + }, + "_index": "git", + }, + { + "_id": "7c4f94ecdb38f0e91c7ee52f579c0ea148afcc7d", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": ["elasticsearch_dsl/utils.py"], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 2, "insertions": 3, "lines": 5, "files": 1}, + "description": "Ask forgiveness, not permission", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["c10793c2ca43688195e415b25b674ff34d58eaff"], + "committed_date": "2014-03-07T15:13:22", + "authored_date": "2014-03-07T15:13:22", + }, + "_index": "git", + }, + { + "_id": "c10793c2ca43688195e415b25b674ff34d58eaff", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": [ + "elasticsearch_dsl/utils.py", + "elasticsearch_dsl/query.py", + "test_elasticsearch_dsl/test_query.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 24, "insertions": 27, "lines": 51, "files": 3}, + "description": "Extract DSL object registration to DslMeta", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["d8867fdb17fcf4c696657740fa08d29c36adc6ec"], + "committed_date": "2014-03-07T15:12:13", + "authored_date": "2014-03-07T15:10:31", + }, + "_index": "git", + }, + { + "_id": "d8867fdb17fcf4c696657740fa08d29c36adc6ec", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": [ + "elasticsearch_dsl/search.py", + "test_elasticsearch_dsl/test_search.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 0, "insertions": 13, "lines": 13, "files": 2}, + "description": "Search.to_dict", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["2eb7cd980d917ed6f4a4dd8e246804f710ec5082"], + "committed_date": "2014-03-07T02:58:33", + "authored_date": "2014-03-07T02:58:33", + }, + "_index": "git", + }, + { + "_id": "2eb7cd980d917ed6f4a4dd8e246804f710ec5082", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": [ + "elasticsearch_dsl/search.py", + "test_elasticsearch_dsl/test_search.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 0, "insertions": 113, "lines": 113, "files": 2}, + "description": "Basic Search object", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["11708576f9118e0dbf27ae1f8a7b799cf281b511"], + "committed_date": "2014-03-06T21:02:03", + "authored_date": "2014-03-06T21:01:05", + }, + "_index": "git", + }, + { + "_id": "11708576f9118e0dbf27ae1f8a7b799cf281b511", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": [ + "elasticsearch_dsl/query.py", + "test_elasticsearch_dsl/test_query.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 0, "insertions": 13, "lines": 13, "files": 2}, + "description": "MatchAll query + anything is anything", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["1dc496e5c7c1b2caf290df477fca2db61ebe37e0"], + "committed_date": "2014-03-06T20:40:39", + "authored_date": "2014-03-06T20:39:52", + }, + "_index": "git", + }, + { + "_id": "1dc496e5c7c1b2caf290df477fca2db61ebe37e0", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": [ + "elasticsearch_dsl/query.py", + "test_elasticsearch_dsl/test_query.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 0, "insertions": 53, "lines": 53, "files": 2}, + "description": "From_dict, Q(dict) and bool query parses it's subqueries", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["d407f99d1959b7b862a541c066d9fd737ce913f3"], + "committed_date": "2014-03-06T20:24:30", + "authored_date": "2014-03-06T20:24:30", + }, + "_index": "git", + }, + { + "_id": "d407f99d1959b7b862a541c066d9fd737ce913f3", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": ["CONTRIBUTING.md", "README.rst"], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 6, "insertions": 21, "lines": 27, "files": 2}, + "description": "Housekeeping - licence and updated generic CONTRIBUTING.md", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["277e8ecc7395754d1ba1f2411ec32337a3e9d73f"], + "committed_date": "2014-03-05T16:21:44", + "authored_date": "2014-03-05T16:21:44", + }, + "_index": "git", + }, + { + "_id": "277e8ecc7395754d1ba1f2411ec32337a3e9d73f", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": [ + "elasticsearch_dsl/query.py", + "setup.py", + "test_elasticsearch_dsl/test_query.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 0, "insertions": 59, "lines": 59, "files": 3}, + "description": "Automatic query registration and Q function", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["8f1e34bd8f462fec50bcc10971df2d57e2986604"], + "committed_date": "2014-03-05T16:18:52", + "authored_date": "2014-03-05T16:18:52", + }, + "_index": "git", + }, + { + "_id": "8f1e34bd8f462fec50bcc10971df2d57e2986604", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": [ + "elasticsearch_dsl/query.py", + "test_elasticsearch_dsl/test_query.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 0, "insertions": 54, "lines": 54, "files": 2}, + "description": "Initial implementation of match and bool queries", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["fcff47ddcc6d08be5739d03dd30f504fb9db2608"], + "committed_date": "2014-03-05T15:55:06", + "authored_date": "2014-03-05T15:55:06", + }, + "_index": "git", + }, + { + "_id": "fcff47ddcc6d08be5739d03dd30f504fb9db2608", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": [ + "docs/Makefile", + "CONTRIBUTING.md", + "docs/conf.py", + "LICENSE", + "Changelog.rst", + "docs/index.rst", + "docs/Changelog.rst", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 0, "insertions": 692, "lines": 692, "files": 7}, + "description": "Docs template", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["febe8127ae48fcc81778c0fb2d628f1bcc0a0350"], + "committed_date": "2014-03-04T01:42:31", + "authored_date": "2014-03-04T01:42:31", + }, + "_index": "git", + }, + { + "_id": "febe8127ae48fcc81778c0fb2d628f1bcc0a0350", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": [ + "elasticsearch_dsl/__init__.py", + "test_elasticsearch_dsl/run_tests.py", + "setup.py", + "README.rst", + "test_elasticsearch_dsl/__init__.py", + ], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 0, "insertions": 82, "lines": 82, "files": 5}, + "description": "Empty project structure", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": ["2a8f1ce89760bfc72808f3945b539eae650acac9"], + "committed_date": "2014-03-04T01:37:49", + "authored_date": "2014-03-03T18:23:55", + }, + "_index": "git", + }, + { + "_id": "2a8f1ce89760bfc72808f3945b539eae650acac9", + "routing": "elasticsearch-dsl-py", + "_source": { + "commit_repo": {"name": "commit", "parent": "elasticsearch-dsl-py"}, + "files": [".gitignore"], + "committer": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "stats": {"deletions": 0, "insertions": 9, "lines": 9, "files": 1}, + "description": "Initial commit, .gitignore", + "author": {"name": "Honza Kr\xe1l", "email": "honza.kral@gmail.com"}, + "parent_shas": [], + "committed_date": "2014-03-03T18:15:05", + "authored_date": "2014-03-03T18:15:05", + }, + "_index": "git", + }, +] + + +def flatten_doc(d: Dict[str, Any]) -> Dict[str, Any]: + src = d["_source"].copy() + del src["commit_repo"] + return {"_index": "flat-git", "_id": d["_id"], "_source": src} + + +FLAT_DATA = [flatten_doc(d) for d in DATA if "routing" in d] + + +def create_test_git_data(d: Dict[str, Any]) -> Dict[str, Any]: + src = d["_source"].copy() + return { + "_index": "test-git", + "routing": "elasticsearch-dsl-py", + "_id": d["_id"], + "_source": src, + } + + +TEST_GIT_DATA = [create_test_git_data(d) for d in DATA] diff --git a/test_elasticsearch/test_dsl/test_integration/test_examples/__init__.py b/test_elasticsearch/test_dsl/test_integration/test_examples/__init__.py new file mode 100644 index 000000000..2a87d183f --- /dev/null +++ b/test_elasticsearch/test_dsl/test_integration/test_examples/__init__.py @@ -0,0 +1,16 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/test_elasticsearch/test_dsl/test_integration/test_examples/_async/__init__.py b/test_elasticsearch/test_dsl/test_integration/test_examples/_async/__init__.py new file mode 100644 index 000000000..2a87d183f --- /dev/null +++ b/test_elasticsearch/test_dsl/test_integration/test_examples/_async/__init__.py @@ -0,0 +1,16 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/test_elasticsearch/test_dsl/test_integration/test_examples/_async/test_alias_migration.py b/test_elasticsearch/test_dsl/test_integration/test_examples/_async/test_alias_migration.py new file mode 100644 index 000000000..dae4c973f --- /dev/null +++ b/test_elasticsearch/test_dsl/test_integration/test_examples/_async/test_alias_migration.py @@ -0,0 +1,73 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import pytest +from elasticsearch import AsyncElasticsearch + +from ..async_examples import alias_migration +from ..async_examples.alias_migration import ALIAS, PATTERN, BlogPost, migrate + + +@pytest.mark.asyncio +async def test_alias_migration(async_write_client: AsyncElasticsearch) -> None: + # create the index + await alias_migration.setup() + + # verify that template, index, and alias has been set up + assert await async_write_client.indices.exists_index_template(name=ALIAS) + assert await async_write_client.indices.exists(index=PATTERN) + assert await async_write_client.indices.exists_alias(name=ALIAS) + + indices = await async_write_client.indices.get(index=PATTERN) + assert len(indices) == 1 + index_name, _ = indices.popitem() + + # which means we can now save a document + with open(__file__) as f: + bp = BlogPost( + _id=0, + title="Hello World!", + tags=["testing", "dummy"], + content=f.read(), + published=None, + ) + await bp.save(refresh=True) + + assert await BlogPost.search().count() == 1 + + # _matches work which means we get BlogPost instance + bp = (await BlogPost.search().execute())[0] + assert isinstance(bp, BlogPost) + assert not bp.is_published() + assert "0" == bp.meta.id + + # create new index + await migrate() + + indices = await async_write_client.indices.get(index=PATTERN) + assert 2 == len(indices) + alias = await async_write_client.indices.get(index=ALIAS) + assert 1 == len(alias) + assert index_name not in alias + + # data has been moved properly + assert await BlogPost.search().count() == 1 + + # _matches work which means we get BlogPost instance + bp = (await BlogPost.search().execute())[0] + assert isinstance(bp, BlogPost) + assert "0" == bp.meta.id diff --git a/test_elasticsearch/test_dsl/test_integration/test_examples/_async/test_completion.py b/test_elasticsearch/test_dsl/test_integration/test_examples/_async/test_completion.py new file mode 100644 index 000000000..e9716c1d2 --- /dev/null +++ b/test_elasticsearch/test_dsl/test_integration/test_examples/_async/test_completion.py @@ -0,0 +1,39 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import pytest +from elasticsearch import AsyncElasticsearch + +from ..async_examples.completion import Person + + +@pytest.mark.asyncio +async def test_person_suggests_on_all_variants_of_name( + async_write_client: AsyncElasticsearch, +) -> None: + await Person.init(using=async_write_client) + + await Person(_id=None, name="Honza Král", popularity=42).save(refresh=True) + + s = Person.search().suggest("t", "kra", completion={"field": "suggest"}) + response = await s.execute() + + opts = response.suggest["t"][0].options + + assert 1 == len(opts) + assert opts[0]._score == 42 + assert opts[0]._source.name == "Honza Král" diff --git a/test_elasticsearch/test_dsl/test_integration/test_examples/_async/test_composite_aggs.py b/test_elasticsearch/test_dsl/test_integration/test_examples/_async/test_composite_aggs.py new file mode 100644 index 000000000..4bb4e68a3 --- /dev/null +++ b/test_elasticsearch/test_dsl/test_integration/test_examples/_async/test_composite_aggs.py @@ -0,0 +1,57 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import pytest +from elasticsearch import AsyncElasticsearch + +from elasticsearch.dsl import A, AsyncSearch + +from ..async_examples.composite_agg import scan_aggs + + +@pytest.mark.asyncio +async def test_scan_aggs_exhausts_all_files( + async_data_client: AsyncElasticsearch, +) -> None: + s = AsyncSearch(index="flat-git") + key_aggs = [{"files": A("terms", field="files")}] + file_list = [f async for f in scan_aggs(s, key_aggs)] + + assert len(file_list) == 26 + + +@pytest.mark.asyncio +async def test_scan_aggs_with_multiple_aggs( + async_data_client: AsyncElasticsearch, +) -> None: + s = AsyncSearch(index="flat-git") + key_aggs = [ + {"files": A("terms", field="files")}, + { + "months": A( + "date_histogram", field="committed_date", calendar_interval="month" + ) + }, + ] + file_list = [ + f + async for f in scan_aggs( + s, key_aggs, {"first_seen": A("min", field="committed_date")} + ) + ] + + assert len(file_list) == 47 diff --git a/test_elasticsearch/test_dsl/test_integration/test_examples/_async/test_parent_child.py b/test_elasticsearch/test_dsl/test_integration/test_examples/_async/test_parent_child.py new file mode 100644 index 000000000..4d8527081 --- /dev/null +++ b/test_elasticsearch/test_dsl/test_integration/test_examples/_async/test_parent_child.py @@ -0,0 +1,116 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from datetime import datetime + +import pytest +import pytest_asyncio +from elasticsearch import AsyncElasticsearch + +from elasticsearch.dsl import Q + +from ..async_examples.parent_child import Answer, Comment, Question, User, setup + +honza = User( + id=42, + signed_up=datetime(2013, 4, 3), + username="honzakral", + email="honza@elastic.co", + location="Prague", +) + +nick = User( + id=47, + signed_up=datetime(2017, 4, 3), + username="fxdgear", + email="nick.lang@elastic.co", + location="Colorado", +) + + +@pytest_asyncio.fixture +async def question(async_write_client: AsyncElasticsearch) -> Question: + await setup() + assert await async_write_client.indices.exists_index_template(name="base") + + # create a question object + q = Question( + _id=1, + author=nick, + tags=["elasticsearch", "python"], + title="How do I use elasticsearch from Python?", + body=""" + I want to use elasticsearch, how do I do it from Python? + """, + created=None, + question_answer=None, + comments=[], + ) + await q.save() + return q + + +@pytest.mark.asyncio +async def test_comment( + async_write_client: AsyncElasticsearch, question: Question +) -> None: + await question.add_comment(nick, "Just use elasticsearch-py") + + q = await Question.get(1) # type: ignore[arg-type] + assert isinstance(q, Question) + assert 1 == len(q.comments) + + c = q.comments[0] + assert isinstance(c, Comment) + assert c.author.username == "fxdgear" + + +@pytest.mark.asyncio +async def test_question_answer( + async_write_client: AsyncElasticsearch, question: Question +) -> None: + a = await question.add_answer(honza, "Just use `elasticsearch-py`!") + + assert isinstance(a, Answer) + + # refresh the index so we can search right away + await Question._index.refresh() + + # we can now fetch answers from elasticsearch + answers = await question.get_answers() + assert 1 == len(answers) + assert isinstance(answers[0], Answer) + + search = Question.search().query( + "has_child", + type="answer", + inner_hits={}, + query=Q("term", author__username__keyword="honzakral"), + ) + response = await search.execute() + + assert 1 == len(response.hits) + + q = response.hits[0] + assert isinstance(q, Question) + assert 1 == len(q.meta.inner_hits.answer.hits) + assert q.meta.inner_hits.answer.hits is await q.get_answers() + + a = q.meta.inner_hits.answer.hits[0] + assert isinstance(a, Answer) + assert isinstance(await a.get_question(), Question) + assert (await a.get_question()).meta.id == "1" diff --git a/test_elasticsearch/test_dsl/test_integration/test_examples/_async/test_percolate.py b/test_elasticsearch/test_dsl/test_integration/test_examples/_async/test_percolate.py new file mode 100644 index 000000000..d1564d94b --- /dev/null +++ b/test_elasticsearch/test_dsl/test_integration/test_examples/_async/test_percolate.py @@ -0,0 +1,37 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import pytest +from elasticsearch import AsyncElasticsearch + +from ..async_examples.percolate import BlogPost, setup + + +@pytest.mark.asyncio +async def test_post_gets_tagged_automatically( + async_write_client: AsyncElasticsearch, +) -> None: + await setup() + + bp = BlogPost(_id=47, content="nothing about snakes here!") + bp_py = BlogPost(_id=42, content="something about Python here!") + + await bp.save() + await bp_py.save() + + assert [] == bp.tags + assert {"programming", "development", "python"} == set(bp_py.tags) diff --git a/test_elasticsearch/test_dsl/test_integration/test_examples/_async/test_vectors.py b/test_elasticsearch/test_dsl/test_integration/test_examples/_async/test_vectors.py new file mode 100644 index 000000000..7d3acdd34 --- /dev/null +++ b/test_elasticsearch/test_dsl/test_integration/test_examples/_async/test_vectors.py @@ -0,0 +1,56 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from hashlib import md5 +from typing import Any, List, Tuple +from unittest import SkipTest + +import pytest +from elasticsearch import AsyncElasticsearch + +from test_elasticsearch.test_dsl.async_sleep import sleep + +from ..async_examples import vectors + + +@pytest.mark.asyncio +async def test_vector_search( + async_write_client: AsyncElasticsearch, es_version: Tuple[int, ...], mocker: Any +) -> None: + # this test only runs on Elasticsearch >= 8.11 because the example uses + # a dense vector without specifying an explicit size + if es_version < (8, 11): + raise SkipTest("This test requires Elasticsearch 8.11 or newer") + + class MockModel: + def __init__(self, model: Any): + pass + + def encode(self, text: str) -> List[float]: + vector = [int(ch) for ch in md5(text.encode()).digest()] + total = sum(vector) + return [float(v) / total for v in vector] + + mocker.patch.object(vectors, "SentenceTransformer", new=MockModel) + + await vectors.create() + for i in range(10): + results = await (await vectors.search("Welcome to our team!")).execute() + if len(results.hits) > 0: + break + await sleep(0.1) + assert results[0].name == "New Employee Onboarding Guide" diff --git a/test_elasticsearch/test_dsl/test_integration/test_examples/_sync/__init__.py b/test_elasticsearch/test_dsl/test_integration/test_examples/_sync/__init__.py new file mode 100644 index 000000000..2a87d183f --- /dev/null +++ b/test_elasticsearch/test_dsl/test_integration/test_examples/_sync/__init__.py @@ -0,0 +1,16 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/test_elasticsearch/test_dsl/test_integration/test_examples/_sync/test_alias_migration.py b/test_elasticsearch/test_dsl/test_integration/test_examples/_sync/test_alias_migration.py new file mode 100644 index 000000000..9a74b699b --- /dev/null +++ b/test_elasticsearch/test_dsl/test_integration/test_examples/_sync/test_alias_migration.py @@ -0,0 +1,73 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import pytest +from elasticsearch import Elasticsearch + +from ..examples import alias_migration +from ..examples.alias_migration import ALIAS, PATTERN, BlogPost, migrate + + +@pytest.mark.sync +def test_alias_migration(write_client: Elasticsearch) -> None: + # create the index + alias_migration.setup() + + # verify that template, index, and alias has been set up + assert write_client.indices.exists_index_template(name=ALIAS) + assert write_client.indices.exists(index=PATTERN) + assert write_client.indices.exists_alias(name=ALIAS) + + indices = write_client.indices.get(index=PATTERN) + assert len(indices) == 1 + index_name, _ = indices.popitem() + + # which means we can now save a document + with open(__file__) as f: + bp = BlogPost( + _id=0, + title="Hello World!", + tags=["testing", "dummy"], + content=f.read(), + published=None, + ) + bp.save(refresh=True) + + assert BlogPost.search().count() == 1 + + # _matches work which means we get BlogPost instance + bp = (BlogPost.search().execute())[0] + assert isinstance(bp, BlogPost) + assert not bp.is_published() + assert "0" == bp.meta.id + + # create new index + migrate() + + indices = write_client.indices.get(index=PATTERN) + assert 2 == len(indices) + alias = write_client.indices.get(index=ALIAS) + assert 1 == len(alias) + assert index_name not in alias + + # data has been moved properly + assert BlogPost.search().count() == 1 + + # _matches work which means we get BlogPost instance + bp = (BlogPost.search().execute())[0] + assert isinstance(bp, BlogPost) + assert "0" == bp.meta.id diff --git a/test_elasticsearch/test_dsl/test_integration/test_examples/_sync/test_completion.py b/test_elasticsearch/test_dsl/test_integration/test_examples/_sync/test_completion.py new file mode 100644 index 000000000..6dec13e20 --- /dev/null +++ b/test_elasticsearch/test_dsl/test_integration/test_examples/_sync/test_completion.py @@ -0,0 +1,39 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import pytest +from elasticsearch import Elasticsearch + +from ..examples.completion import Person + + +@pytest.mark.sync +def test_person_suggests_on_all_variants_of_name( + write_client: Elasticsearch, +) -> None: + Person.init(using=write_client) + + Person(_id=None, name="Honza Král", popularity=42).save(refresh=True) + + s = Person.search().suggest("t", "kra", completion={"field": "suggest"}) + response = s.execute() + + opts = response.suggest["t"][0].options + + assert 1 == len(opts) + assert opts[0]._score == 42 + assert opts[0]._source.name == "Honza Král" diff --git a/test_elasticsearch/test_dsl/test_integration/test_examples/_sync/test_composite_aggs.py b/test_elasticsearch/test_dsl/test_integration/test_examples/_sync/test_composite_aggs.py new file mode 100644 index 000000000..f7d519f92 --- /dev/null +++ b/test_elasticsearch/test_dsl/test_integration/test_examples/_sync/test_composite_aggs.py @@ -0,0 +1,57 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import pytest +from elasticsearch import Elasticsearch + +from elasticsearch.dsl import A, Search + +from ..examples.composite_agg import scan_aggs + + +@pytest.mark.sync +def test_scan_aggs_exhausts_all_files( + data_client: Elasticsearch, +) -> None: + s = Search(index="flat-git") + key_aggs = [{"files": A("terms", field="files")}] + file_list = [f for f in scan_aggs(s, key_aggs)] + + assert len(file_list) == 26 + + +@pytest.mark.sync +def test_scan_aggs_with_multiple_aggs( + data_client: Elasticsearch, +) -> None: + s = Search(index="flat-git") + key_aggs = [ + {"files": A("terms", field="files")}, + { + "months": A( + "date_histogram", field="committed_date", calendar_interval="month" + ) + }, + ] + file_list = [ + f + for f in scan_aggs( + s, key_aggs, {"first_seen": A("min", field="committed_date")} + ) + ] + + assert len(file_list) == 47 diff --git a/test_elasticsearch/test_dsl/test_integration/test_examples/_sync/test_parent_child.py b/test_elasticsearch/test_dsl/test_integration/test_examples/_sync/test_parent_child.py new file mode 100644 index 000000000..514f03686 --- /dev/null +++ b/test_elasticsearch/test_dsl/test_integration/test_examples/_sync/test_parent_child.py @@ -0,0 +1,111 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from datetime import datetime + +import pytest +from elasticsearch import Elasticsearch + +from elasticsearch.dsl import Q + +from ..examples.parent_child import Answer, Comment, Question, User, setup + +honza = User( + id=42, + signed_up=datetime(2013, 4, 3), + username="honzakral", + email="honza@elastic.co", + location="Prague", +) + +nick = User( + id=47, + signed_up=datetime(2017, 4, 3), + username="fxdgear", + email="nick.lang@elastic.co", + location="Colorado", +) + + +@pytest.fixture +def question(write_client: Elasticsearch) -> Question: + setup() + assert write_client.indices.exists_index_template(name="base") + + # create a question object + q = Question( + _id=1, + author=nick, + tags=["elasticsearch", "python"], + title="How do I use elasticsearch from Python?", + body=""" + I want to use elasticsearch, how do I do it from Python? + """, + created=None, + question_answer=None, + comments=[], + ) + q.save() + return q + + +@pytest.mark.sync +def test_comment(write_client: Elasticsearch, question: Question) -> None: + question.add_comment(nick, "Just use elasticsearch-py") + + q = Question.get(1) # type: ignore[arg-type] + assert isinstance(q, Question) + assert 1 == len(q.comments) + + c = q.comments[0] + assert isinstance(c, Comment) + assert c.author.username == "fxdgear" + + +@pytest.mark.sync +def test_question_answer(write_client: Elasticsearch, question: Question) -> None: + a = question.add_answer(honza, "Just use `elasticsearch-py`!") + + assert isinstance(a, Answer) + + # refresh the index so we can search right away + Question._index.refresh() + + # we can now fetch answers from elasticsearch + answers = question.get_answers() + assert 1 == len(answers) + assert isinstance(answers[0], Answer) + + search = Question.search().query( + "has_child", + type="answer", + inner_hits={}, + query=Q("term", author__username__keyword="honzakral"), + ) + response = search.execute() + + assert 1 == len(response.hits) + + q = response.hits[0] + assert isinstance(q, Question) + assert 1 == len(q.meta.inner_hits.answer.hits) + assert q.meta.inner_hits.answer.hits is q.get_answers() + + a = q.meta.inner_hits.answer.hits[0] + assert isinstance(a, Answer) + assert isinstance(a.get_question(), Question) + assert (a.get_question()).meta.id == "1" diff --git a/test_elasticsearch/test_dsl/test_integration/test_examples/_sync/test_percolate.py b/test_elasticsearch/test_dsl/test_integration/test_examples/_sync/test_percolate.py new file mode 100644 index 000000000..925d362c2 --- /dev/null +++ b/test_elasticsearch/test_dsl/test_integration/test_examples/_sync/test_percolate.py @@ -0,0 +1,37 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import pytest +from elasticsearch import Elasticsearch + +from ..examples.percolate import BlogPost, setup + + +@pytest.mark.sync +def test_post_gets_tagged_automatically( + write_client: Elasticsearch, +) -> None: + setup() + + bp = BlogPost(_id=47, content="nothing about snakes here!") + bp_py = BlogPost(_id=42, content="something about Python here!") + + bp.save() + bp_py.save() + + assert [] == bp.tags + assert {"programming", "development", "python"} == set(bp_py.tags) diff --git a/test_elasticsearch/test_dsl/test_integration/test_examples/_sync/test_vectors.py b/test_elasticsearch/test_dsl/test_integration/test_examples/_sync/test_vectors.py new file mode 100644 index 000000000..ff0d0e759 --- /dev/null +++ b/test_elasticsearch/test_dsl/test_integration/test_examples/_sync/test_vectors.py @@ -0,0 +1,56 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from hashlib import md5 +from typing import Any, List, Tuple +from unittest import SkipTest + +import pytest +from elasticsearch import Elasticsearch + +from test_elasticsearch.test_dsl.sleep import sleep + +from ..examples import vectors + + +@pytest.mark.sync +def test_vector_search( + write_client: Elasticsearch, es_version: Tuple[int, ...], mocker: Any +) -> None: + # this test only runs on Elasticsearch >= 8.11 because the example uses + # a dense vector without specifying an explicit size + if es_version < (8, 11): + raise SkipTest("This test requires Elasticsearch 8.11 or newer") + + class MockModel: + def __init__(self, model: Any): + pass + + def encode(self, text: str) -> List[float]: + vector = [int(ch) for ch in md5(text.encode()).digest()] + total = sum(vector) + return [float(v) / total for v in vector] + + mocker.patch.object(vectors, "SentenceTransformer", new=MockModel) + + vectors.create() + for i in range(10): + results = (vectors.search("Welcome to our team!")).execute() + if len(results.hits) > 0: + break + sleep(0.1) + assert results[0].name == "New Employee Onboarding Guide" diff --git a/test_elasticsearch/test_dsl/test_integration/test_examples/async_examples b/test_elasticsearch/test_dsl/test_integration/test_examples/async_examples new file mode 120000 index 000000000..96158259a --- /dev/null +++ b/test_elasticsearch/test_dsl/test_integration/test_examples/async_examples @@ -0,0 +1 @@ +../../../../examples/dsl/async \ No newline at end of file diff --git a/test_elasticsearch/test_dsl/test_integration/test_examples/examples b/test_elasticsearch/test_dsl/test_integration/test_examples/examples new file mode 120000 index 000000000..ff15b4ebc --- /dev/null +++ b/test_elasticsearch/test_dsl/test_integration/test_examples/examples @@ -0,0 +1 @@ +../../../../examples/dsl \ No newline at end of file diff --git a/test_elasticsearch/test_dsl/test_package.py b/test_elasticsearch/test_dsl/test_package.py new file mode 100644 index 000000000..2e989baa1 --- /dev/null +++ b/test_elasticsearch/test_dsl/test_package.py @@ -0,0 +1,22 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import elasticsearch.dsl + + +def test__all__is_sorted() -> None: + assert elasticsearch.dsl.__all__ == sorted(elasticsearch.dsl.__all__) diff --git a/test_elasticsearch/test_dsl/test_query.py b/test_elasticsearch/test_dsl/test_query.py new file mode 100644 index 000000000..c09f26b1a --- /dev/null +++ b/test_elasticsearch/test_dsl/test_query.py @@ -0,0 +1,671 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from pytest import raises + +from elasticsearch.dsl import function, query, utils + + +def test_empty_Q_is_match_all() -> None: + q = query.Q() + + assert isinstance(q, query.MatchAll) + assert query.MatchAll() == q + + +def test_combined_fields_to_dict() -> None: + assert { + "combined_fields": { + "query": "this is a test", + "fields": ["name", "body", "description"], + "operator": "and", + }, + } == query.CombinedFields( + query="this is a test", + fields=["name", "body", "description"], + operator="and", + ).to_dict() + + +def test_combined_fields_to_dict_extra() -> None: + assert { + "combined_fields": { + "query": "this is a test", + "fields": ["name", "body^2"], + "operator": "or", + }, + } == query.CombinedFields( + query="this is a test", + fields=["name", "body^2"], + operator="or", + ).to_dict() + + +def test_match_to_dict() -> None: + assert {"match": {"f": "value"}} == query.Match(f="value").to_dict() + + +def test_match_to_dict_extra() -> None: + assert {"match": {"f": "value", "boost": 2}} == query.Match( + f="value", boost=2 + ).to_dict() + + +def test_fuzzy_to_dict() -> None: + assert {"fuzzy": {"f": "value"}} == query.Fuzzy(f="value").to_dict() + + +def test_prefix_to_dict() -> None: + assert {"prefix": {"f": "value"}} == query.Prefix(f="value").to_dict() + + +def test_term_to_dict() -> None: + assert {"term": {"_type": "article"}} == query.Term(_type="article").to_dict() + + +def test_terms_to_dict() -> None: + assert {"terms": {"_type": ["article", "section"]}} == query.Terms( + _type=["article", "section"] + ).to_dict() + assert {"terms": {"_type": ["article", "section"], "boost": 1.1}} == query.Terms( + _type=("article", "section"), boost=1.1 + ).to_dict() + assert {"terms": {"_type": "article", "boost": 1.1}} == query.Terms( + _type="article", boost=1.1 + ).to_dict() + assert { + "terms": {"_id": {"index": "my-other-index", "id": "my-id"}, "boost": 1.1} + } == query.Terms( + _id={"index": "my-other-index", "id": "my-id"}, boost=1.1 + ).to_dict() + + +def test_bool_to_dict() -> None: + bool = query.Bool(must=[query.Match(f="value")], should=[]) + + assert {"bool": {"must": [{"match": {"f": "value"}}]}} == bool.to_dict() + + +def test_dismax_to_dict() -> None: + assert {"dis_max": {"queries": [{"term": {"_type": "article"}}]}} == query.DisMax( + queries=[query.Term(_type="article")] + ).to_dict() + + +def test_bool_from_dict_issue_318() -> None: + d = {"bool": {"must_not": {"match": {"field": "value"}}}} + q = query.Q(d) + + assert q == ~query.Match(field="value") + + +def test_repr() -> None: + bool = query.Bool(must=[query.Match(f="value")], should=[]) + + assert "Bool(must=[Match(f='value')])" == repr(bool) + + +def test_query_clone() -> None: + bool = query.Bool( + must=[query.Match(x=42)], + should=[query.Match(g="v2")], + must_not=[query.Match(title="value")], + ) + bool_clone = bool._clone() + + assert bool == bool_clone + assert bool is not bool_clone + + +def test_bool_converts_its_init_args_to_queries() -> None: + q = query.Bool(must=[{"match": {"f": "value"}}]) # type: ignore + + assert len(q.must) == 1 + assert q.must[0] == query.Match(f="value") + + +def test_two_queries_make_a_bool() -> None: + q1 = query.Match(f="value1") + q2 = query.Match(message={"query": "this is a test", "opeartor": "and"}) + q = q1 & q2 + + assert isinstance(q, query.Bool) + assert [q1, q2] == q.must + + +def test_other_and_bool_appends_other_to_must() -> None: + q1 = query.Match(f="value1") + qb = query.Bool() + + q = q1 & qb + assert q is not qb + assert q.must[0] == q1 + + +def test_bool_and_other_appends_other_to_must() -> None: + q1 = query.Match(f="value1") + qb = query.Bool() + + q = qb & q1 + assert q is not qb + assert q.must[0] == q1 + + +def test_bool_and_other_sets_min_should_match_if_needed() -> None: + q1 = query.Q("term", category=1) + q2 = query.Q( + "bool", should=[query.Q("term", name="aaa"), query.Q("term", name="bbb")] + ) + + q = q1 & q2 + assert q == query.Bool( + must=[q1], + should=[query.Q("term", name="aaa"), query.Q("term", name="bbb")], + minimum_should_match=1, + ) + + +def test_bool_with_different_minimum_should_match_should_not_be_combined() -> None: + q1 = query.Q( + "bool", + minimum_should_match=2, + should=[ + query.Q("term", field="aa1"), + query.Q("term", field="aa2"), + query.Q("term", field="aa3"), + query.Q("term", field="aa4"), + ], + ) + q2 = query.Q( + "bool", + minimum_should_match=3, + should=[ + query.Q("term", field="bb1"), + query.Q("term", field="bb2"), + query.Q("term", field="bb3"), + query.Q("term", field="bb4"), + ], + ) + q3 = query.Q( + "bool", + minimum_should_match=4, + should=[ + query.Q("term", field="cc1"), + query.Q("term", field="cc2"), + query.Q("term", field="cc3"), + query.Q("term", field="cc4"), + ], + ) + + q4 = q1 | q2 + assert q4 == query.Bool(should=[q1, q2]) + + q5 = q1 | q2 | q3 + assert q5 == query.Bool(should=[q1, q2, q3]) + + +def test_empty_bool_has_min_should_match_0() -> None: + assert 0 == query.Bool()._min_should_match + + +def test_query_and_query_creates_bool() -> None: + q1 = query.Match(f=42) + q2 = query.Match(g=47) + + q = q1 & q2 + assert isinstance(q, query.Bool) + assert q.must == [q1, q2] + + +def test_match_all_and_query_equals_other() -> None: + q1 = query.Match(f=42) + q2 = query.MatchAll() + + q = q1 & q2 + assert q1 == q + + +def test_not_match_all_is_match_none() -> None: + q = query.MatchAll() + + assert ~q == query.MatchNone() + + +def test_not_match_none_is_match_all() -> None: + q = query.MatchNone() + + assert ~q == query.MatchAll() + + +def test_invert_empty_bool_is_match_none() -> None: + q = query.Bool() + + assert ~q == query.MatchNone() + + +def test_match_none_or_query_equals_query() -> None: + q1 = query.Match(f=42) + q2 = query.MatchNone() + + assert q1 | q2 == query.Match(f=42) + + +def test_match_none_and_query_equals_match_none() -> None: + q1 = query.Match(f=42) + q2 = query.MatchNone() + + assert q1 & q2 == query.MatchNone() + + +def test_bool_and_bool() -> None: + qt1, qt2, qt3 = query.Match(f=1), query.Match(f=2), query.Match(f=3) + + q1 = query.Bool(must=[qt1], should=[qt2]) + q2 = query.Bool(must_not=[qt3]) + assert q1 & q2 == query.Bool( + must=[qt1], must_not=[qt3], should=[qt2], minimum_should_match=0 + ) + + q1 = query.Bool(must=[qt1], should=[qt1, qt2]) + q2 = query.Bool(should=[qt3]) + assert q1 & q2 == query.Bool( + must=[qt1, qt3], should=[qt1, qt2], minimum_should_match=0 + ) + + +def test_bool_and_bool_with_min_should_match() -> None: + qt1, qt2 = query.Match(f=1), query.Match(f=2) + q1 = query.Q("bool", minimum_should_match=1, should=[qt1]) + q2 = query.Q("bool", minimum_should_match=1, should=[qt2]) + + assert query.Q("bool", must=[qt1, qt2]) == q1 & q2 + + +def test_negative_min_should_match() -> None: + qt1, qt2 = query.Match(f=1), query.Match(f=2) + q1 = query.Q("bool", minimum_should_match=-2, should=[qt1]) + q2 = query.Q("bool", minimum_should_match=1, should=[qt2]) + + with raises(ValueError): + q1 & q2 + with raises(ValueError): + q2 & q1 + + +def test_percentage_min_should_match() -> None: + qt1, qt2 = query.Match(f=1), query.Match(f=2) + q1 = query.Q("bool", minimum_should_match="50%", should=[qt1]) + q2 = query.Q("bool", minimum_should_match=1, should=[qt2]) + + with raises(ValueError): + q1 & q2 + with raises(ValueError): + q2 & q1 + + +def test_inverted_query_becomes_bool_with_must_not() -> None: + q = query.Match(f=42) + + assert ~q == query.Bool(must_not=[query.Match(f=42)]) + + +def test_inverted_query_with_must_not_become_should() -> None: + q = query.Q("bool", must_not=[query.Q("match", f=1), query.Q("match", f=2)]) + + assert ~q == query.Q("bool", should=[query.Q("match", f=1), query.Q("match", f=2)]) + + +def test_inverted_query_with_must_and_must_not() -> None: + q = query.Q( + "bool", + must=[query.Q("match", f=3), query.Q("match", f=4)], + must_not=[query.Q("match", f=1), query.Q("match", f=2)], + ) + print((~q).to_dict()) + assert ~q == query.Q( + "bool", + should=[ + # negation of must + query.Q("bool", must_not=[query.Q("match", f=3)]), + query.Q("bool", must_not=[query.Q("match", f=4)]), + # negation of must_not + query.Q("match", f=1), + query.Q("match", f=2), + ], + ) + + +def test_double_invert_returns_original_query() -> None: + q = query.Match(f=42) + + assert q == ~~q + + +def test_bool_query_gets_inverted_internally() -> None: + q = query.Bool(must_not=[query.Match(f=42)], must=[query.Match(g="v")]) + + assert ~q == query.Bool( + should=[ + # negating must + query.Bool(must_not=[query.Match(g="v")]), + # negating must_not + query.Match(f=42), + ] + ) + + +def test_match_all_or_something_is_match_all() -> None: + q1 = query.MatchAll() + q2 = query.Match(f=42) + + assert (q1 | q2) == query.MatchAll() + assert (q2 | q1) == query.MatchAll() + + +def test_or_produces_bool_with_should() -> None: + q1 = query.Match(f=42) + q2 = query.Match(g="v") + + q = q1 | q2 + assert q == query.Bool(should=[q1, q2]) + + +def test_or_bool_doesnt_loop_infinitely_issue_37() -> None: + q = query.Match(f=42) | ~query.Match(f=47) + + assert q == query.Bool( + should=[query.Bool(must_not=[query.Match(f=47)]), query.Match(f=42)] + ) + + +def test_or_bool_doesnt_loop_infinitely_issue_96() -> None: + q = ~query.Match(f=42) | ~query.Match(f=47) + + assert q == query.Bool( + should=[ + query.Bool(must_not=[query.Match(f=42)]), + query.Bool(must_not=[query.Match(f=47)]), + ] + ) + + +def test_bool_will_append_another_query_with_or() -> None: + qb = query.Bool(should=[query.Match(f="v"), query.Match(f="v2")]) + q = query.Match(g=42) + + assert (q | qb) == query.Bool(should=[query.Match(f="v"), query.Match(f="v2"), q]) + + +def test_bool_queries_with_only_should_get_concatenated() -> None: + q1 = query.Bool(should=[query.Match(f=1), query.Match(f=2)]) + q2 = query.Bool(should=[query.Match(f=3), query.Match(f=4)]) + + assert (q1 | q2) == query.Bool( + should=[query.Match(f=1), query.Match(f=2), query.Match(f=3), query.Match(f=4)] + ) + + +def test_two_bool_queries_append_one_to_should_if_possible() -> None: + q1 = query.Bool(should=[query.Match(f="v")]) + q2 = query.Bool(must=[query.Match(f="v")]) + + assert (q1 | q2) == query.Bool( + should=[query.Match(f="v"), query.Bool(must=[query.Match(f="v")])] + ) + assert (q2 | q1) == query.Bool( + should=[query.Match(f="v"), query.Bool(must=[query.Match(f="v")])] + ) + + +def test_queries_are_registered() -> None: + assert "match" in query.Query._classes + assert query.Query._classes["match"] is query.Match + + +def test_defining_query_registers_it() -> None: + class MyQuery(query.Query): + name = "my_query" + + assert "my_query" in query.Query._classes + assert query.Query._classes["my_query"] is MyQuery + + +def test_Q_passes_query_through() -> None: + q = query.Match(f="value1") + + assert query.Q(q) is q + + +def test_Q_constructs_query_by_name() -> None: + q = query.Q("match", f="value") + + assert isinstance(q, query.Match) + assert {"f": "value"} == q._params + + +def test_Q_translates_double_underscore_to_dots_in_param_names() -> None: + q = query.Q("match", comment__author="honza") + + assert {"comment.author": "honza"} == q._params + + +def test_Q_doesn_translate_double_underscore_to_dots_in_param_names() -> None: + q = query.Q("match", comment__author="honza", _expand__to_dot=False) + + assert {"comment__author": "honza"} == q._params + + +def test_Q_constructs_simple_query_from_dict() -> None: + q = query.Q({"match": {"f": "value"}}) + + assert isinstance(q, query.Match) + assert {"f": "value"} == q._params + + +def test_Q_constructs_compound_query_from_dict() -> None: + q = query.Q({"bool": {"must": [{"match": {"f": "value"}}]}}) + + assert q == query.Bool(must=[query.Match(f="value")]) + + +def test_Q_raises_error_when_passed_in_dict_and_params() -> None: + with raises(Exception): + # Ignore types as it's not a valid call + query.Q({"match": {"f": "value"}}, f="value") # type: ignore[call-overload] + + +def test_Q_raises_error_when_passed_in_query_and_params() -> None: + q = query.Match(f="value1") + + with raises(Exception): + # Ignore types as it's not a valid call signature + query.Q(q, f="value") # type: ignore[call-overload] + + +def test_Q_raises_error_on_unknown_query() -> None: + with raises(Exception): + query.Q("not a query", f="value") + + +def test_match_all_and_anything_is_anything() -> None: + q = query.MatchAll() + + s = query.Match(f=42) + assert q & s == s + assert s & q == s + + +def test_function_score_with_functions() -> None: + q = query.Q( + "function_score", + functions=[query.SF("script_score", script="doc['comment_count'] * _score")], + ) + + assert { + "function_score": { + "functions": [{"script_score": {"script": "doc['comment_count'] * _score"}}] + } + } == q.to_dict() + + +def test_function_score_with_no_function_is_boost_factor() -> None: + q = query.Q( + "function_score", + functions=[query.SF({"weight": 20, "filter": query.Q("term", f=42)})], + ) + + assert { + "function_score": {"functions": [{"filter": {"term": {"f": 42}}, "weight": 20}]} + } == q.to_dict() + + +def test_function_score_to_dict() -> None: + q = query.Q( + "function_score", + query=query.Q("match", title="python"), + functions=[ + query.SF("random_score"), + query.SF( + "field_value_factor", + field="comment_count", + filter=query.Q("term", tags="python"), + ), + ], + ) + + d = { + "function_score": { + "query": {"match": {"title": "python"}}, + "functions": [ + {"random_score": {}}, + { + "filter": {"term": {"tags": "python"}}, + "field_value_factor": {"field": "comment_count"}, + }, + ], + } + } + assert d == q.to_dict() + + +def test_function_score_class_based_to_dict() -> None: + q = query.FunctionScore( + query=query.Match(title="python"), + functions=[ + function.RandomScore(), + function.FieldValueFactor( + field="comment_count", + filter=query.Term(tags="python"), + ), + ], + ) + + d = { + "function_score": { + "query": {"match": {"title": "python"}}, + "functions": [ + {"random_score": {}}, + { + "filter": {"term": {"tags": "python"}}, + "field_value_factor": {"field": "comment_count"}, + }, + ], + } + } + assert d == q.to_dict() + + +def test_function_score_with_single_function() -> None: + d = { + "function_score": { + "filter": {"term": {"tags": "python"}}, + "script_score": {"script": "doc['comment_count'] * _score"}, + } + } + + q = query.Q(d) + assert isinstance(q, query.FunctionScore) + assert isinstance(q.filter, query.Term) + assert len(q.functions) == 1 + + sf = q.functions[0] + assert isinstance(sf, function.ScriptScore) + assert "doc['comment_count'] * _score" == sf.script + + +def test_function_score_from_dict() -> None: + d = { + "function_score": { + "filter": {"term": {"tags": "python"}}, + "functions": [ + { + "filter": {"terms": {"tags": "python"}}, + "script_score": {"script": "doc['comment_count'] * _score"}, + }, + {"boost_factor": 6}, + ], + } + } + + q = query.Q(d) + assert isinstance(q, query.FunctionScore) + assert isinstance(q.filter, query.Term) + assert len(q.functions) == 2 + + sf = q.functions[0] + assert isinstance(sf, function.ScriptScore) + assert isinstance(sf.filter, query.Terms) + + sf = q.functions[1] + assert isinstance(sf, function.BoostFactor) + assert 6 == sf.value + assert {"boost_factor": 6} == sf.to_dict() + + +def test_script_score() -> None: + d = { + "script_score": { + "query": {"match_all": {}}, + "script": {"source": "...", "params": {}}, + } + } + q = query.Q(d) + + assert isinstance(q, query.ScriptScore) + assert isinstance(q.query, query.MatchAll) + assert q.script == {"source": "...", "params": {}} + assert q.to_dict() == d + + +def test_expand_double_underscore_to_dot_setting() -> None: + q = query.Term(comment__count=2) + assert q.to_dict() == {"term": {"comment.count": 2}} + utils.EXPAND__TO_DOT = False + q = query.Term(comment__count=2) + assert q.to_dict() == {"term": {"comment__count": 2}} + utils.EXPAND__TO_DOT = True + + +def test_knn_query() -> None: + q = query.Knn(field="image-vector", query_vector=[-5, 9, -12], num_candidates=10) + assert q.to_dict() == { + "knn": { + "field": "image-vector", + "query_vector": [-5, 9, -12], + "num_candidates": 10, + } + } diff --git a/test_elasticsearch/test_dsl/test_result.py b/test_elasticsearch/test_dsl/test_result.py new file mode 100644 index 000000000..46707c715 --- /dev/null +++ b/test_elasticsearch/test_dsl/test_result.py @@ -0,0 +1,215 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import pickle +from datetime import date +from typing import Any, Dict + +from pytest import fixture, raises + +from elasticsearch.dsl import Date, Document, Object, Search, response +from elasticsearch.dsl.aggs import Terms +from elasticsearch.dsl.response.aggs import AggResponse, Bucket, BucketData +from elasticsearch.dsl.utils import AttrDict + + +@fixture +def agg_response(aggs_search: Search, aggs_data: Dict[str, Any]) -> response.Response: + return response.Response(aggs_search, aggs_data) + + +def test_agg_response_is_pickleable(agg_response: response.Response) -> None: + agg_response.hits + r = pickle.loads(pickle.dumps(agg_response)) + + assert r == agg_response + assert r._search == agg_response._search + assert r.hits == agg_response.hits + + +def test_response_is_pickleable(dummy_response: Dict[str, Any]) -> None: + res = response.Response(Search(), dummy_response.body) # type: ignore[attr-defined] + res.hits + r = pickle.loads(pickle.dumps(res)) + + assert r == res + assert r._search == res._search + assert r.hits == res.hits + + +def test_hit_is_pickleable(dummy_response: Dict[str, Any]) -> None: + res = response.Response(Search(), dummy_response) + hits = pickle.loads(pickle.dumps(res.hits)) + + assert hits == res.hits + assert hits[0].meta == res.hits[0].meta + + +def test_response_stores_search(dummy_response: Dict[str, Any]) -> None: + s = Search() + r = response.Response(s, dummy_response) + + assert r._search is s + + +def test_attribute_error_in_hits_is_not_hidden(dummy_response: Dict[str, Any]) -> None: + def f(hit: AttrDict[Any]) -> Any: + raise AttributeError() + + s = Search().doc_type(employee=f) + r = response.Response(s, dummy_response) + with raises(TypeError): + r.hits + + +def test_interactive_helpers(dummy_response: Dict[str, Any]) -> None: + res = response.Response(Search(), dummy_response) + hits = res.hits + h = hits[0] + + rhits = ( + "[, , " + ", ]" + ).format( + repr(dummy_response["hits"]["hits"][0]["_source"]), + repr(dummy_response["hits"]["hits"][1]["_source"])[:60], + repr(dummy_response["hits"]["hits"][2]["_source"])[:60], + ) + + assert res + assert f"" == repr(res) + assert rhits == repr(hits) + assert {"meta", "city", "name"} == set(dir(h)) + assert "" % dummy_response["hits"]["hits"][0][ + "_source" + ] == repr(h) + + +def test_empty_response_is_false(dummy_response: Dict[str, Any]) -> None: + dummy_response["hits"]["hits"] = [] + res = response.Response(Search(), dummy_response) + + assert not res + + +def test_len_response(dummy_response: Dict[str, Any]) -> None: + res = response.Response(Search(), dummy_response) + assert len(res) == 4 + + +def test_iterating_over_response_gives_you_hits(dummy_response: Dict[str, Any]) -> None: + res = response.Response(Search(), dummy_response) + hits = list(h for h in res) + + assert res.success() + assert 123 == res.took + assert 4 == len(hits) + assert all(isinstance(h, response.Hit) for h in hits) + h = hits[0] + + assert "test-index" == h.meta.index + assert "company" == h.meta.doc_type + assert "elasticsearch" == h.meta.id + assert 12 == h.meta.score + + assert hits[1].meta.routing == "elasticsearch" + + +def test_hits_get_wrapped_to_contain_additional_attrs( + dummy_response: Dict[str, Any] +) -> None: + res = response.Response(Search(), dummy_response) + hits = res.hits + + assert 123 == hits.total # type: ignore[attr-defined] + assert 12.0 == hits.max_score # type: ignore[attr-defined] + + +def test_hits_provide_dot_and_bracket_access_to_attrs( + dummy_response: Dict[str, Any] +) -> None: + res = response.Response(Search(), dummy_response) + h = res.hits[0] + + assert "Elasticsearch" == h.name + assert "Elasticsearch" == h["name"] + + assert "Honza" == res.hits[2].name.first + + with raises(KeyError): + h["not_there"] + + with raises(AttributeError): + h.not_there + + +def test_slicing_on_response_slices_on_hits(dummy_response: Dict[str, Any]) -> None: + res = response.Response(Search(), dummy_response) + + assert res[0] is res.hits[0] + assert res[::-1] == res.hits[::-1] + + +def test_aggregation_base(agg_response: response.Response) -> None: + assert agg_response.aggs is agg_response.aggregations + assert isinstance(agg_response.aggs, response.AggResponse) + + +def test_metric_agg_works(agg_response: response.Response) -> None: + assert 25052.0 == agg_response.aggs.sum_lines.value + + +def test_aggregations_can_be_iterated_over(agg_response: response.Response) -> None: + aggs = [a for a in agg_response.aggs] + + assert len(aggs) == 3 + assert all(map(lambda a: isinstance(a, AggResponse), aggs)) + + +def test_aggregations_can_be_retrieved_by_name( + agg_response: response.Response, aggs_search: Search +) -> None: + a = agg_response.aggs["popular_files"] + + assert isinstance(a, BucketData) + assert isinstance(a._meta["aggs"], Terms) + assert a._meta["aggs"] is aggs_search.aggs.aggs["popular_files"] + + +def test_bucket_response_can_be_iterated_over(agg_response: response.Response) -> None: + popular_files = agg_response.aggregations.popular_files + + buckets = [b for b in popular_files] + assert all(isinstance(b, Bucket) for b in buckets) + assert buckets == popular_files.buckets + + +def test_bucket_keys_get_deserialized( + aggs_data: Dict[str, Any], aggs_search: Search +) -> None: + class Commit(Document): + info = Object(properties={"committed_date": Date()}) + + class Index: + name = "test-commit" + + aggs_search = aggs_search.doc_type(Commit) + agg_response = response.Response(aggs_search, aggs_data) + + per_month = agg_response.aggregations.per_month + for b in per_month: + assert isinstance(b.key, date) diff --git a/test_elasticsearch/test_dsl/test_utils.py b/test_elasticsearch/test_dsl/test_utils.py new file mode 100644 index 000000000..ac4d6df6e --- /dev/null +++ b/test_elasticsearch/test_dsl/test_utils.py @@ -0,0 +1,136 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import pickle +from typing import Any, Dict, Tuple + +from pytest import raises + +from elasticsearch.dsl import Q, serializer, utils + + +def test_attrdict_pickle() -> None: + ad: utils.AttrDict[str] = utils.AttrDict({}) + + pickled_ad = pickle.dumps(ad) + assert ad == pickle.loads(pickled_ad) + + +def test_attrlist_pickle() -> None: + al = utils.AttrList[Any]([]) + + pickled_al = pickle.dumps(al) + assert al == pickle.loads(pickled_al) + + +def test_attrlist_slice() -> None: + class MyAttrDict(utils.AttrDict[str]): + pass + + l = utils.AttrList[Any]([{}, {}], obj_wrapper=MyAttrDict) + assert isinstance(l[:][0], MyAttrDict) + + +def test_attrlist_with_type_argument() -> None: + a = utils.AttrList[str](["a", "b"]) + assert list(a) == ["a", "b"] + + +def test_attrdict_keys_items() -> None: + a = utils.AttrDict({"a": {"b": 42, "c": 47}, "d": "e"}) + assert list(a.keys()) == ["a", "d"] + assert list(a.items()) == [("a", {"b": 42, "c": 47}), ("d", "e")] + + +def test_attrdict_with_type_argument() -> None: + a = utils.AttrDict[str]({"a": "b"}) + assert list(a.keys()) == ["a"] + assert list(a.items()) == [("a", "b")] + + +def test_merge() -> None: + a: utils.AttrDict[Any] = utils.AttrDict({"a": {"b": 42, "c": 47}}) + b = {"a": {"b": 123, "d": -12}, "e": [1, 2, 3]} + + utils.merge(a, b) + + assert a == {"a": {"b": 123, "c": 47, "d": -12}, "e": [1, 2, 3]} + + +def test_merge_conflict() -> None: + data: Tuple[Dict[str, Any], ...] = ( + {"a": 42}, + {"a": {"b": 47}}, + ) + for d in data: + utils.merge({"a": {"b": 42}}, d) + with raises(ValueError): + utils.merge({"a": {"b": 42}}, d, True) + + +def test_attrdict_bool() -> None: + d: utils.AttrDict[str] = utils.AttrDict({}) + + assert not d + d.title = "Title" + assert d + + +def test_attrlist_items_get_wrapped_during_iteration() -> None: + al = utils.AttrList([1, object(), [1], {}]) + + l = list(iter(al)) + + assert isinstance(l[2], utils.AttrList) + assert isinstance(l[3], utils.AttrDict) + + +def test_serializer_deals_with_Attr_versions() -> None: + d = utils.AttrDict({"key": utils.AttrList([1, 2, 3])}) + + assert serializer.serializer.dumps(d) == serializer.serializer.dumps( + {"key": [1, 2, 3]} + ) + + +def test_serializer_deals_with_objects_with_to_dict() -> None: + class MyClass: + def to_dict(self) -> int: + return 42 + + assert serializer.serializer.dumps(MyClass()) == b"42" + + +def test_recursive_to_dict() -> None: + assert utils.recursive_to_dict({"k": [1, (1.0, {"v": Q("match", key="val")})]}) == { + "k": [1, (1.0, {"v": {"match": {"key": "val"}}})] + } + + +def test_attrlist_to_list() -> None: + l = utils.AttrList[Any]([{}, {}]).to_list() + assert isinstance(l, list) + assert l == [{}, {}] + + +def test_attrdict_with_reserved_keyword() -> None: + d = utils.AttrDict({"from": 10, "size": 20}) + assert d.from_ == 10 + assert d.size == 20 + d = utils.AttrDict({}) + d.from_ = 10 + assert {"from": 10} == d.to_dict() diff --git a/test_elasticsearch/test_dsl/test_validation.py b/test_elasticsearch/test_dsl/test_validation.py new file mode 100644 index 000000000..e14550eba --- /dev/null +++ b/test_elasticsearch/test_dsl/test_validation.py @@ -0,0 +1,162 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from datetime import datetime +from typing import Any + +from pytest import raises + +from elasticsearch.dsl import ( + Date, + Document, + InnerDoc, + Integer, + Nested, + Object, + Text, + mapped_field, +) +from elasticsearch.dsl.exceptions import ValidationException + + +class Author(InnerDoc): + name: str + email: str + + def clean(self) -> None: + if not self.name: + raise ValidationException("name is missing") + if not self.email: + raise ValidationException("email is missing") + elif self.name.lower() not in self.email: + raise ValidationException("Invalid email!") + + +class BlogPost(Document): + authors = Nested(Author, required=True) + created = Date() + inner = Object() + + +class BlogPostWithStatus(Document): + published: bool = mapped_field(init=False) + + +class AutoNowDate(Date): + def clean(self, data: Any) -> Any: + if data is None: + data = datetime.now() + return super().clean(data) + + +class Log(Document): + timestamp = AutoNowDate(required=True) + data = Text() + + +def test_required_int_can_be_0() -> None: + class DT(Document): + i = Integer(required=True) + + dt = DT(i=0) + dt.full_clean() + + +def test_required_field_cannot_be_empty_list() -> None: + class DT(Document): + i = Integer(required=True) + + dt = DT(i=[]) + with raises(ValidationException): + dt.full_clean() + + +def test_validation_works_for_lists_of_values() -> None: + class DT(Document): + i = Date(required=True) + + dt = DT(i=[datetime.now(), "not date"]) + with raises(ValidationException): + dt.full_clean() + + dt = DT(i=[datetime.now(), datetime.now()]) + dt.full_clean() + + +def test_field_with_custom_clean() -> None: + l = Log() + l.full_clean() + + assert isinstance(l.timestamp, datetime) + + +def test_empty_object() -> None: + d = BlogPost(authors=[{"name": "Honza", "email": "honza@elastic.co"}]) + d.inner = {} # type: ignore[assignment] + + d.full_clean() + + +def test_missing_required_field_raises_validation_exception() -> None: + d = BlogPost() + with raises(ValidationException): + d.full_clean() + + d = BlogPost() + d.authors.append({"name": "Honza"}) + with raises(ValidationException): + d.full_clean() + + d = BlogPost() + d.authors.append({"name": "Honza", "email": "honza@elastic.co"}) + d.full_clean() + + +def test_boolean_doesnt_treat_false_as_empty() -> None: + d = BlogPostWithStatus() + with raises(ValidationException): + d.full_clean() + d.published = False + d.full_clean() + d.published = True + d.full_clean() + + +def test_custom_validation_on_nested_gets_run() -> None: + d = BlogPost(authors=[Author(name="Honza", email="king@example.com")], created=None) + + assert isinstance(d.authors[0], Author) # type: ignore[index] + + with raises(ValidationException): + d.full_clean() + + +def test_accessing_known_fields_returns_empty_value() -> None: + d = BlogPost() + + assert [] == d.authors + + d.authors.append({}) + assert None is d.authors[0].name # type: ignore[index] + assert None is d.authors[0].email + + +def test_empty_values_are_not_serialized() -> None: + d = BlogPost(authors=[{"name": "Honza", "email": "honza@elastic.co"}], created=None) + + d.full_clean() + assert d.to_dict() == {"authors": [{"name": "Honza", "email": "honza@elastic.co"}]} diff --git a/test_elasticsearch/test_dsl/test_wrappers.py b/test_elasticsearch/test_dsl/test_wrappers.py new file mode 100644 index 000000000..8af6652a8 --- /dev/null +++ b/test_elasticsearch/test_dsl/test_wrappers.py @@ -0,0 +1,111 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from datetime import datetime, timedelta +from typing import TYPE_CHECKING, Any, Mapping, Optional, Sequence + +if TYPE_CHECKING: + from _operator import _SupportsComparison + +import pytest + +from elasticsearch.dsl import Range + + +@pytest.mark.parametrize( + "kwargs, item", + [ + ({}, 1), + ({}, -1), + ({"gte": -1}, -1), + ({"lte": 4}, 4), + ({"lte": 4, "gte": 2}, 4), + ({"lte": 4, "gte": 2}, 2), + ({"gt": datetime.now() - timedelta(seconds=10)}, datetime.now()), + ], +) +def test_range_contains( + kwargs: Mapping[str, "_SupportsComparison"], item: "_SupportsComparison" +) -> None: + assert item in Range(**kwargs) + + +@pytest.mark.parametrize( + "kwargs, item", + [ + ({"gt": -1}, -1), + ({"lt": 4}, 4), + ({"lt": 4}, 42), + ({"lte": 4, "gte": 2}, 1), + ({"lte": datetime.now() - timedelta(seconds=10)}, datetime.now()), + ], +) +def test_range_not_contains( + kwargs: Mapping[str, "_SupportsComparison"], item: "_SupportsComparison" +) -> None: + assert item not in Range(**kwargs) + + +@pytest.mark.parametrize( + "args,kwargs", + [ + (({},), {"lt": 42}), + ((), {"not_lt": 42}), + ((object(),), {}), + ((), {"lt": 1, "lte": 1}), + ((), {"gt": 1, "gte": 1}), + ], +) +def test_range_raises_value_error_on_wrong_params( + args: Sequence[Any], kwargs: Mapping[str, "_SupportsComparison"] +) -> None: + with pytest.raises(ValueError): + Range(*args, **kwargs) + + +@pytest.mark.parametrize( + "range,lower,inclusive", + [ + (Range(gt=1), 1, False), + (Range(gte=1), 1, True), + (Range(), None, False), + (Range(lt=42), None, False), + ], +) +def test_range_lower( + range: Range["_SupportsComparison"], + lower: Optional["_SupportsComparison"], + inclusive: bool, +) -> None: + assert (lower, inclusive) == range.lower + + +@pytest.mark.parametrize( + "range,upper,inclusive", + [ + (Range(lt=1), 1, False), + (Range(lte=1), 1, True), + (Range(), None, False), + (Range(gt=42), None, False), + ], +) +def test_range_upper( + range: Range["_SupportsComparison"], + upper: Optional["_SupportsComparison"], + inclusive: bool, +) -> None: + assert (upper, inclusive) == range.upper From 4ef877eaadc35889cddf8f7a9f84ec98e497ee02 Mon Sep 17 00:00:00 2001 From: Miguel Grinberg Date: Mon, 13 Jan 2025 15:55:23 +0000 Subject: [PATCH 02/11] reformat code --- elasticsearch/dsl/_async/document.py | 3 +- elasticsearch/dsl/_async/faceted_search.py | 1 - elasticsearch/dsl/_async/index.py | 1 + elasticsearch/dsl/_async/search.py | 3 +- elasticsearch/dsl/_sync/document.py | 3 +- elasticsearch/dsl/_sync/faceted_search.py | 1 - elasticsearch/dsl/_sync/index.py | 1 + elasticsearch/dsl/_sync/search.py | 3 +- elasticsearch/dsl/aggs.py | 1 - elasticsearch/dsl/document_base.py | 4 +-- elasticsearch/dsl/query.py | 1 - elasticsearch/dsl/search.py | 12 ++----- elasticsearch/dsl/utils.py | 1 + examples/dsl/async/composite_agg.py | 3 +- examples/dsl/async/sparse_vectors.py | 2 +- examples/dsl/async/vectors.py | 2 +- examples/dsl/composite_agg.py | 3 +- examples/dsl/sparse_vectors.py | 2 +- examples/dsl/vectors.py | 2 +- noxfile.py | 34 +++++++++++++++++-- setup.cfg | 2 +- test_elasticsearch/test_dsl/conftest.py | 6 ++-- .../test_dsl/test_connections.py | 2 +- .../test_integration/_async/test_analysis.py | 2 +- .../test_integration/_async/test_document.py | 4 +-- .../_async/test_faceted_search.py | 2 +- .../test_integration/_async/test_index.py | 2 +- .../test_integration/_async/test_mapping.py | 2 +- .../test_integration/_async/test_search.py | 2 +- .../_async/test_update_by_query.py | 2 +- .../test_integration/_sync/test_analysis.py | 2 +- .../test_integration/_sync/test_document.py | 4 +-- .../_sync/test_faceted_search.py | 2 +- .../test_integration/_sync/test_index.py | 2 +- .../test_integration/_sync/test_mapping.py | 2 +- .../test_integration/_sync/test_search.py | 2 +- .../_sync/test_update_by_query.py | 2 +- .../test_dsl/test_integration/test_count.py | 1 - .../_async/test_alias_migration.py | 1 + .../test_examples/_async/test_completion.py | 1 + .../_async/test_composite_aggs.py | 2 +- .../test_examples/_async/test_parent_child.py | 2 +- .../test_examples/_async/test_percolate.py | 1 + .../test_examples/_async/test_vectors.py | 2 +- .../_sync/test_alias_migration.py | 1 + .../test_examples/_sync/test_completion.py | 1 + .../_sync/test_composite_aggs.py | 2 +- .../test_examples/_sync/test_parent_child.py | 2 +- .../test_examples/_sync/test_percolate.py | 1 + .../test_examples/_sync/test_vectors.py | 2 +- .../test_vectorstore/test_vectorstore.py | 4 +-- 51 files changed, 87 insertions(+), 61 deletions(-) diff --git a/elasticsearch/dsl/_async/document.py b/elasticsearch/dsl/_async/document.py index 3f5d69f11..c7ece2dea 100644 --- a/elasticsearch/dsl/_async/document.py +++ b/elasticsearch/dsl/_async/document.py @@ -28,9 +28,10 @@ cast, ) +from typing_extensions import Self, dataclass_transform + from elasticsearch.exceptions import NotFoundError, RequestError from elasticsearch.helpers import async_bulk -from typing_extensions import Self, dataclass_transform from .._async.index import AsyncIndex from ..async_connections import get_connection diff --git a/elasticsearch/dsl/_async/faceted_search.py b/elasticsearch/dsl/_async/faceted_search.py index 199dcfca1..545392254 100644 --- a/elasticsearch/dsl/_async/faceted_search.py +++ b/elasticsearch/dsl/_async/faceted_search.py @@ -18,7 +18,6 @@ from typing import TYPE_CHECKING from ..faceted_search_base import FacetedResponse, FacetedSearchBase - from ..utils import _R from .search import AsyncSearch diff --git a/elasticsearch/dsl/_async/index.py b/elasticsearch/dsl/_async/index.py index 71542dffd..58369579b 100644 --- a/elasticsearch/dsl/_async/index.py +++ b/elasticsearch/dsl/_async/index.py @@ -29,6 +29,7 @@ if TYPE_CHECKING: from elastic_transport import ObjectApiResponse + from elasticsearch import AsyncElasticsearch diff --git a/elasticsearch/dsl/_async/search.py b/elasticsearch/dsl/_async/search.py index ea6288622..42eb142fd 100644 --- a/elasticsearch/dsl/_async/search.py +++ b/elasticsearch/dsl/_async/search.py @@ -27,9 +27,10 @@ cast, ) +from typing_extensions import Self + from elasticsearch.exceptions import ApiError from elasticsearch.helpers import async_scan -from typing_extensions import Self from ..async_connections import get_connection from ..response import Response diff --git a/elasticsearch/dsl/_sync/document.py b/elasticsearch/dsl/_sync/document.py index c8143412f..3444563ad 100644 --- a/elasticsearch/dsl/_sync/document.py +++ b/elasticsearch/dsl/_sync/document.py @@ -28,9 +28,10 @@ cast, ) +from typing_extensions import Self, dataclass_transform + from elasticsearch.exceptions import NotFoundError, RequestError from elasticsearch.helpers import bulk -from typing_extensions import Self, dataclass_transform from .._sync.index import Index from ..connections import get_connection diff --git a/elasticsearch/dsl/_sync/faceted_search.py b/elasticsearch/dsl/_sync/faceted_search.py index 115492c7a..4bdac90de 100644 --- a/elasticsearch/dsl/_sync/faceted_search.py +++ b/elasticsearch/dsl/_sync/faceted_search.py @@ -18,7 +18,6 @@ from typing import TYPE_CHECKING from ..faceted_search_base import FacetedResponse, FacetedSearchBase - from ..utils import _R from .search import Search diff --git a/elasticsearch/dsl/_sync/index.py b/elasticsearch/dsl/_sync/index.py index 171f70bfb..b2d5830d9 100644 --- a/elasticsearch/dsl/_sync/index.py +++ b/elasticsearch/dsl/_sync/index.py @@ -29,6 +29,7 @@ if TYPE_CHECKING: from elastic_transport import ObjectApiResponse + from elasticsearch import Elasticsearch diff --git a/elasticsearch/dsl/_sync/search.py b/elasticsearch/dsl/_sync/search.py index f3e028347..f46364a67 100644 --- a/elasticsearch/dsl/_sync/search.py +++ b/elasticsearch/dsl/_sync/search.py @@ -18,9 +18,10 @@ import contextlib from typing import TYPE_CHECKING, Any, Dict, Iterator, List, Optional, cast +from typing_extensions import Self + from elasticsearch.exceptions import ApiError from elasticsearch.helpers import scan -from typing_extensions import Self from ..connections import get_connection from ..response import Response diff --git a/elasticsearch/dsl/aggs.py b/elasticsearch/dsl/aggs.py index 6175027d7..6c51c3ace 100644 --- a/elasticsearch/dsl/aggs.py +++ b/elasticsearch/dsl/aggs.py @@ -43,7 +43,6 @@ from elastic_transport.client_utils import DefaultType from . import types - from .document_base import InstrumentedField from .search_base import SearchBase diff --git a/elasticsearch/dsl/document_base.py b/elasticsearch/dsl/document_base.py index a7026778a..b4f8d67e5 100644 --- a/elasticsearch/dsl/document_base.py +++ b/elasticsearch/dsl/document_base.py @@ -34,9 +34,9 @@ ) try: - from types import UnionType # type: ignore[attr-defined] + from types import UnionType except ImportError: - UnionType = None + UnionType = None # type: ignore from typing_extensions import dataclass_transform diff --git a/elasticsearch/dsl/query.py b/elasticsearch/dsl/query.py index 1b3d9f22b..0a7d288c8 100644 --- a/elasticsearch/dsl/query.py +++ b/elasticsearch/dsl/query.py @@ -49,7 +49,6 @@ from elastic_transport.client_utils import DefaultType from . import types, wrappers - from .document_base import InstrumentedField _T = TypeVar("_T") diff --git a/elasticsearch/dsl/search.py b/elasticsearch/dsl/search.py index eea200e00..bf2036ffd 100644 --- a/elasticsearch/dsl/search.py +++ b/elasticsearch/dsl/search.py @@ -15,14 +15,6 @@ # specific language governing permissions and limitations # under the License. -from ._async.search import ( # noqa: F401 - AsyncEmptySearch, - AsyncMultiSearch, - AsyncSearch, -) -from ._sync.search import ( # noqa: F401 - EmptySearch, - MultiSearch, - Search, -) +from ._async.search import AsyncEmptySearch, AsyncMultiSearch, AsyncSearch # noqa: F401 +from ._sync.search import EmptySearch, MultiSearch, Search # noqa: F401 from .search_base import Q # noqa: F401 diff --git a/elasticsearch/dsl/utils.py b/elasticsearch/dsl/utils.py index b425f79a4..fb5848e20 100644 --- a/elasticsearch/dsl/utils.py +++ b/elasticsearch/dsl/utils.py @@ -43,6 +43,7 @@ if TYPE_CHECKING: from elastic_transport import ObjectApiResponse + from elasticsearch import AsyncElasticsearch, Elasticsearch from .document_base import DocumentOptions diff --git a/examples/dsl/async/composite_agg.py b/examples/dsl/async/composite_agg.py index f9a7640a3..e6ea62cc6 100644 --- a/examples/dsl/async/composite_agg.py +++ b/examples/dsl/async/composite_agg.py @@ -19,10 +19,9 @@ import os from typing import Any, AsyncIterator, Dict, Mapping, Sequence, cast -from elasticsearch.helpers import async_bulk - from elasticsearch.dsl import Agg, AsyncSearch, Response, aggs, async_connections from elasticsearch.dsl.types import CompositeAggregate +from elasticsearch.helpers import async_bulk from test_elasticsearch.test_dsl.test_integration.test_data import DATA, GIT_INDEX diff --git a/examples/dsl/async/sparse_vectors.py b/examples/dsl/async/sparse_vectors.py index 86d99bfff..a8abddbda 100644 --- a/examples/dsl/async/sparse_vectors.py +++ b/examples/dsl/async/sparse_vectors.py @@ -67,7 +67,7 @@ from typing import Any, Dict, List, Optional from urllib.request import urlopen -import nltk # type: ignore +import nltk from tqdm import tqdm from elasticsearch.dsl import ( diff --git a/examples/dsl/async/vectors.py b/examples/dsl/async/vectors.py index 62fbfe3f5..5f2a73db6 100644 --- a/examples/dsl/async/vectors.py +++ b/examples/dsl/async/vectors.py @@ -51,7 +51,7 @@ from typing import Any, List, Optional, cast from urllib.request import urlopen -import nltk # type: ignore +import nltk from sentence_transformers import SentenceTransformer from tqdm import tqdm diff --git a/examples/dsl/composite_agg.py b/examples/dsl/composite_agg.py index 6710222b8..56f7ae7a6 100644 --- a/examples/dsl/composite_agg.py +++ b/examples/dsl/composite_agg.py @@ -18,10 +18,9 @@ import os from typing import Any, Dict, Iterator, Mapping, Sequence, cast -from elasticsearch.helpers import bulk - from elasticsearch.dsl import Agg, Response, Search, aggs, connections from elasticsearch.dsl.types import CompositeAggregate +from elasticsearch.helpers import bulk from test_elasticsearch.test_dsl.test_integration.test_data import DATA, GIT_INDEX diff --git a/examples/dsl/sparse_vectors.py b/examples/dsl/sparse_vectors.py index 01bb99178..970f4f4eb 100644 --- a/examples/dsl/sparse_vectors.py +++ b/examples/dsl/sparse_vectors.py @@ -66,7 +66,7 @@ from typing import Any, Dict, List, Optional from urllib.request import urlopen -import nltk # type: ignore +import nltk from tqdm import tqdm from elasticsearch.dsl import ( diff --git a/examples/dsl/vectors.py b/examples/dsl/vectors.py index 2567e2889..5e00d5537 100644 --- a/examples/dsl/vectors.py +++ b/examples/dsl/vectors.py @@ -50,7 +50,7 @@ from typing import Any, List, Optional, cast from urllib.request import urlopen -import nltk # type: ignore +import nltk from sentence_transformers import SentenceTransformer from tqdm import tqdm diff --git a/noxfile.py b/noxfile.py index b42ed0d2f..b2f211a13 100644 --- a/noxfile.py +++ b/noxfile.py @@ -86,7 +86,13 @@ def lint(session): session.run("python", "-c", "from elasticsearch._otel import OpenTelemetry") session.install( - "flake8", "black~=24.0", "mypy", "isort", "types-requests", "unasync>=0.6.0" + "flake8", + "black~=24.0", + "mypy", + "isort", + "types-requests", + "types-python-dateutil", + "unasync>=0.6.0", ) session.run("isort", "--check", "--profile=black", *SOURCE_FILES) session.run("black", "--check", *SOURCE_FILES) @@ -98,7 +104,14 @@ def lint(session): # Run mypy on the package and then the type examples separately for # the two different mypy use-cases, ourselves and our users. - session.run("mypy", "--strict", "--show-error-codes", "elasticsearch/") + session.run( + "mypy", + "--strict", + "--implicit-reexport", + "--explicit-package-bases", + "--show-error-codes", + "elasticsearch/", + ) session.run( "mypy", "--strict", @@ -111,11 +124,26 @@ def lint(session): "--show-error-codes", "test_elasticsearch/test_types/async_types.py", ) + session.run( + "mypy", + "--strict", + "--implicit-reexport", + "--explicit-package-bases", + "--show-error-codes", + "examples/dsl/", + ) # Make sure we don't require aiohttp to be installed for users to # receive type hint information from mypy. session.run("python", "-m", "pip", "uninstall", "--yes", "aiohttp") - session.run("mypy", "--strict", "--show-error-codes", "elasticsearch/") + session.run( + "mypy", + "--strict", + "--implicit-reexport", + "--explicit-package-bases", + "--show-error-codes", + "elasticsearch/", + ) session.run( "mypy", "--strict", diff --git a/setup.cfg b/setup.cfg index 403f26a50..4d1925616 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,2 +1,2 @@ [flake8] -ignore = E203, E266, E501, W503 +ignore = E203, E266, E501, W503, E704, E741 diff --git a/test_elasticsearch/test_dsl/conftest.py b/test_elasticsearch/test_dsl/conftest.py index f1d865761..2e5fa91af 100644 --- a/test_elasticsearch/test_dsl/conftest.py +++ b/test_elasticsearch/test_dsl/conftest.py @@ -27,15 +27,15 @@ import pytest_asyncio from elastic_transport import ObjectApiResponse -from elasticsearch import AsyncElasticsearch, Elasticsearch -from elasticsearch.exceptions import ConnectionError -from elasticsearch.helpers import bulk from pytest import fixture, skip +from elasticsearch import AsyncElasticsearch, Elasticsearch from elasticsearch.dsl import Search from elasticsearch.dsl.async_connections import add_connection as add_async_connection from elasticsearch.dsl.async_connections import connections as async_connections from elasticsearch.dsl.connections import add_connection, connections +from elasticsearch.exceptions import ConnectionError +from elasticsearch.helpers import bulk from .test_integration._async import test_document as async_document from .test_integration._sync import test_document as sync_document diff --git a/test_elasticsearch/test_dsl/test_connections.py b/test_elasticsearch/test_dsl/test_connections.py index 96706d298..dcaa59a98 100644 --- a/test_elasticsearch/test_dsl/test_connections.py +++ b/test_elasticsearch/test_dsl/test_connections.py @@ -17,9 +17,9 @@ from typing import Any, List -from elasticsearch import Elasticsearch from pytest import raises +from elasticsearch import Elasticsearch from elasticsearch.dsl import connections, serializer diff --git a/test_elasticsearch/test_dsl/test_integration/_async/test_analysis.py b/test_elasticsearch/test_dsl/test_integration/_async/test_analysis.py index 1feae56cf..00598d4d5 100644 --- a/test_elasticsearch/test_dsl/test_integration/_async/test_analysis.py +++ b/test_elasticsearch/test_dsl/test_integration/_async/test_analysis.py @@ -16,8 +16,8 @@ # under the License. import pytest -from elasticsearch import AsyncElasticsearch +from elasticsearch import AsyncElasticsearch from elasticsearch.dsl import analyzer, token_filter, tokenizer diff --git a/test_elasticsearch/test_dsl/test_integration/_async/test_document.py b/test_elasticsearch/test_dsl/test_integration/_async/test_document.py index 83b683e1e..e72955a0a 100644 --- a/test_elasticsearch/test_dsl/test_integration/_async/test_document.py +++ b/test_elasticsearch/test_dsl/test_integration/_async/test_document.py @@ -26,11 +26,10 @@ from typing import TYPE_CHECKING, Any, AsyncIterator, Dict, List, Tuple, Union import pytest -from elasticsearch import AsyncElasticsearch, ConflictError, NotFoundError -from elasticsearch.helpers.errors import BulkIndexError from pytest import raises from pytz import timezone +from elasticsearch import AsyncElasticsearch, ConflictError, NotFoundError from elasticsearch.dsl import ( AsyncDocument, AsyncSearch, @@ -54,6 +53,7 @@ mapped_field, ) from elasticsearch.dsl.utils import AttrList +from elasticsearch.helpers.errors import BulkIndexError snowball = analyzer("my_snow", tokenizer="standard", filter=["lowercase", "snowball"]) diff --git a/test_elasticsearch/test_dsl/test_integration/_async/test_faceted_search.py b/test_elasticsearch/test_dsl/test_integration/_async/test_faceted_search.py index 5efc7033e..276fd0d1c 100644 --- a/test_elasticsearch/test_dsl/test_integration/_async/test_faceted_search.py +++ b/test_elasticsearch/test_dsl/test_integration/_async/test_faceted_search.py @@ -19,8 +19,8 @@ from typing import Tuple, Type import pytest -from elasticsearch import AsyncElasticsearch +from elasticsearch import AsyncElasticsearch from elasticsearch.dsl import A, AsyncDocument, AsyncSearch, Boolean, Date, Keyword from elasticsearch.dsl.faceted_search import ( AsyncFacetedSearch, diff --git a/test_elasticsearch/test_dsl/test_integration/_async/test_index.py b/test_elasticsearch/test_dsl/test_integration/_async/test_index.py index 10c426e5d..e150d1e59 100644 --- a/test_elasticsearch/test_dsl/test_integration/_async/test_index.py +++ b/test_elasticsearch/test_dsl/test_integration/_async/test_index.py @@ -16,8 +16,8 @@ # under the License. import pytest -from elasticsearch import AsyncElasticsearch +from elasticsearch import AsyncElasticsearch from elasticsearch.dsl import ( AsyncComposableIndexTemplate, AsyncDocument, diff --git a/test_elasticsearch/test_dsl/test_integration/_async/test_mapping.py b/test_elasticsearch/test_dsl/test_integration/_async/test_mapping.py index 3f860ba59..f370c89c4 100644 --- a/test_elasticsearch/test_dsl/test_integration/_async/test_mapping.py +++ b/test_elasticsearch/test_dsl/test_integration/_async/test_mapping.py @@ -16,9 +16,9 @@ # under the License. import pytest -from elasticsearch import AsyncElasticsearch from pytest import raises +from elasticsearch import AsyncElasticsearch from elasticsearch.dsl import AsyncMapping, analysis, exceptions diff --git a/test_elasticsearch/test_dsl/test_integration/_async/test_search.py b/test_elasticsearch/test_dsl/test_integration/_async/test_search.py index 627656dfd..a63f6746a 100644 --- a/test_elasticsearch/test_dsl/test_integration/_async/test_search.py +++ b/test_elasticsearch/test_dsl/test_integration/_async/test_search.py @@ -17,9 +17,9 @@ import pytest -from elasticsearch import ApiError, AsyncElasticsearch from pytest import raises +from elasticsearch import ApiError, AsyncElasticsearch from elasticsearch.dsl import ( AsyncDocument, AsyncMultiSearch, diff --git a/test_elasticsearch/test_dsl/test_integration/_async/test_update_by_query.py b/test_elasticsearch/test_dsl/test_integration/_async/test_update_by_query.py index 1fbf9d0e9..b051d284a 100644 --- a/test_elasticsearch/test_dsl/test_integration/_async/test_update_by_query.py +++ b/test_elasticsearch/test_dsl/test_integration/_async/test_update_by_query.py @@ -16,8 +16,8 @@ # under the License. import pytest -from elasticsearch import AsyncElasticsearch +from elasticsearch import AsyncElasticsearch from elasticsearch.dsl import AsyncUpdateByQuery from elasticsearch.dsl.search import Q diff --git a/test_elasticsearch/test_dsl/test_integration/_sync/test_analysis.py b/test_elasticsearch/test_dsl/test_integration/_sync/test_analysis.py index a12756c62..87e5350ba 100644 --- a/test_elasticsearch/test_dsl/test_integration/_sync/test_analysis.py +++ b/test_elasticsearch/test_dsl/test_integration/_sync/test_analysis.py @@ -16,8 +16,8 @@ # under the License. import pytest -from elasticsearch import Elasticsearch +from elasticsearch import Elasticsearch from elasticsearch.dsl import analyzer, token_filter, tokenizer diff --git a/test_elasticsearch/test_dsl/test_integration/_sync/test_document.py b/test_elasticsearch/test_dsl/test_integration/_sync/test_document.py index 08f983b6e..13b60f71b 100644 --- a/test_elasticsearch/test_dsl/test_integration/_sync/test_document.py +++ b/test_elasticsearch/test_dsl/test_integration/_sync/test_document.py @@ -26,11 +26,10 @@ from typing import TYPE_CHECKING, Any, Dict, Iterator, List, Tuple, Union import pytest -from elasticsearch import ConflictError, Elasticsearch, NotFoundError -from elasticsearch.helpers.errors import BulkIndexError from pytest import raises from pytz import timezone +from elasticsearch import ConflictError, Elasticsearch, NotFoundError from elasticsearch.dsl import ( Binary, Boolean, @@ -54,6 +53,7 @@ mapped_field, ) from elasticsearch.dsl.utils import AttrList +from elasticsearch.helpers.errors import BulkIndexError snowball = analyzer("my_snow", tokenizer="standard", filter=["lowercase", "snowball"]) diff --git a/test_elasticsearch/test_dsl/test_integration/_sync/test_faceted_search.py b/test_elasticsearch/test_dsl/test_integration/_sync/test_faceted_search.py index 114800644..8d9baa255 100644 --- a/test_elasticsearch/test_dsl/test_integration/_sync/test_faceted_search.py +++ b/test_elasticsearch/test_dsl/test_integration/_sync/test_faceted_search.py @@ -19,8 +19,8 @@ from typing import Tuple, Type import pytest -from elasticsearch import Elasticsearch +from elasticsearch import Elasticsearch from elasticsearch.dsl import A, Boolean, Date, Document, Keyword, Search from elasticsearch.dsl.faceted_search import ( DateHistogramFacet, diff --git a/test_elasticsearch/test_dsl/test_integration/_sync/test_index.py b/test_elasticsearch/test_dsl/test_integration/_sync/test_index.py index 7509f0b0f..51a7dc40e 100644 --- a/test_elasticsearch/test_dsl/test_integration/_sync/test_index.py +++ b/test_elasticsearch/test_dsl/test_integration/_sync/test_index.py @@ -16,8 +16,8 @@ # under the License. import pytest -from elasticsearch import Elasticsearch +from elasticsearch import Elasticsearch from elasticsearch.dsl import ( ComposableIndexTemplate, Date, diff --git a/test_elasticsearch/test_dsl/test_integration/_sync/test_mapping.py b/test_elasticsearch/test_dsl/test_integration/_sync/test_mapping.py index 270e79a5e..3ce1737db 100644 --- a/test_elasticsearch/test_dsl/test_integration/_sync/test_mapping.py +++ b/test_elasticsearch/test_dsl/test_integration/_sync/test_mapping.py @@ -16,9 +16,9 @@ # under the License. import pytest -from elasticsearch import Elasticsearch from pytest import raises +from elasticsearch import Elasticsearch from elasticsearch.dsl import Mapping, analysis, exceptions diff --git a/test_elasticsearch/test_dsl/test_integration/_sync/test_search.py b/test_elasticsearch/test_dsl/test_integration/_sync/test_search.py index 1ce578fa5..54060d311 100644 --- a/test_elasticsearch/test_dsl/test_integration/_sync/test_search.py +++ b/test_elasticsearch/test_dsl/test_integration/_sync/test_search.py @@ -17,9 +17,9 @@ import pytest -from elasticsearch import ApiError, Elasticsearch from pytest import raises +from elasticsearch import ApiError, Elasticsearch from elasticsearch.dsl import Date, Document, Keyword, MultiSearch, Q, Search, Text from elasticsearch.dsl.response import aggs diff --git a/test_elasticsearch/test_dsl/test_integration/_sync/test_update_by_query.py b/test_elasticsearch/test_dsl/test_integration/_sync/test_update_by_query.py index f16505d49..e6c870a5c 100644 --- a/test_elasticsearch/test_dsl/test_integration/_sync/test_update_by_query.py +++ b/test_elasticsearch/test_dsl/test_integration/_sync/test_update_by_query.py @@ -16,8 +16,8 @@ # under the License. import pytest -from elasticsearch import Elasticsearch +from elasticsearch import Elasticsearch from elasticsearch.dsl import UpdateByQuery from elasticsearch.dsl.search import Q diff --git a/test_elasticsearch/test_dsl/test_integration/test_count.py b/test_elasticsearch/test_dsl/test_integration/test_count.py index 5d52607bc..583a09dd2 100644 --- a/test_elasticsearch/test_dsl/test_integration/test_count.py +++ b/test_elasticsearch/test_dsl/test_integration/test_count.py @@ -18,7 +18,6 @@ from typing import Any from elasticsearch import Elasticsearch - from elasticsearch.dsl.search import Q, Search diff --git a/test_elasticsearch/test_dsl/test_integration/test_examples/_async/test_alias_migration.py b/test_elasticsearch/test_dsl/test_integration/test_examples/_async/test_alias_migration.py index dae4c973f..d2b4294a4 100644 --- a/test_elasticsearch/test_dsl/test_integration/test_examples/_async/test_alias_migration.py +++ b/test_elasticsearch/test_dsl/test_integration/test_examples/_async/test_alias_migration.py @@ -16,6 +16,7 @@ # under the License. import pytest + from elasticsearch import AsyncElasticsearch from ..async_examples import alias_migration diff --git a/test_elasticsearch/test_dsl/test_integration/test_examples/_async/test_completion.py b/test_elasticsearch/test_dsl/test_integration/test_examples/_async/test_completion.py index e9716c1d2..13e73e14a 100644 --- a/test_elasticsearch/test_dsl/test_integration/test_examples/_async/test_completion.py +++ b/test_elasticsearch/test_dsl/test_integration/test_examples/_async/test_completion.py @@ -16,6 +16,7 @@ # under the License. import pytest + from elasticsearch import AsyncElasticsearch from ..async_examples.completion import Person diff --git a/test_elasticsearch/test_dsl/test_integration/test_examples/_async/test_composite_aggs.py b/test_elasticsearch/test_dsl/test_integration/test_examples/_async/test_composite_aggs.py index 4bb4e68a3..2d3ab2df7 100644 --- a/test_elasticsearch/test_dsl/test_integration/test_examples/_async/test_composite_aggs.py +++ b/test_elasticsearch/test_dsl/test_integration/test_examples/_async/test_composite_aggs.py @@ -16,8 +16,8 @@ # under the License. import pytest -from elasticsearch import AsyncElasticsearch +from elasticsearch import AsyncElasticsearch from elasticsearch.dsl import A, AsyncSearch from ..async_examples.composite_agg import scan_aggs diff --git a/test_elasticsearch/test_dsl/test_integration/test_examples/_async/test_parent_child.py b/test_elasticsearch/test_dsl/test_integration/test_examples/_async/test_parent_child.py index 4d8527081..a730c8839 100644 --- a/test_elasticsearch/test_dsl/test_integration/test_examples/_async/test_parent_child.py +++ b/test_elasticsearch/test_dsl/test_integration/test_examples/_async/test_parent_child.py @@ -19,8 +19,8 @@ import pytest import pytest_asyncio -from elasticsearch import AsyncElasticsearch +from elasticsearch import AsyncElasticsearch from elasticsearch.dsl import Q from ..async_examples.parent_child import Answer, Comment, Question, User, setup diff --git a/test_elasticsearch/test_dsl/test_integration/test_examples/_async/test_percolate.py b/test_elasticsearch/test_dsl/test_integration/test_examples/_async/test_percolate.py index d1564d94b..cf1721b8e 100644 --- a/test_elasticsearch/test_dsl/test_integration/test_examples/_async/test_percolate.py +++ b/test_elasticsearch/test_dsl/test_integration/test_examples/_async/test_percolate.py @@ -16,6 +16,7 @@ # under the License. import pytest + from elasticsearch import AsyncElasticsearch from ..async_examples.percolate import BlogPost, setup diff --git a/test_elasticsearch/test_dsl/test_integration/test_examples/_async/test_vectors.py b/test_elasticsearch/test_dsl/test_integration/test_examples/_async/test_vectors.py index 7d3acdd34..49c2c01da 100644 --- a/test_elasticsearch/test_dsl/test_integration/test_examples/_async/test_vectors.py +++ b/test_elasticsearch/test_dsl/test_integration/test_examples/_async/test_vectors.py @@ -20,8 +20,8 @@ from unittest import SkipTest import pytest -from elasticsearch import AsyncElasticsearch +from elasticsearch import AsyncElasticsearch from test_elasticsearch.test_dsl.async_sleep import sleep from ..async_examples import vectors diff --git a/test_elasticsearch/test_dsl/test_integration/test_examples/_sync/test_alias_migration.py b/test_elasticsearch/test_dsl/test_integration/test_examples/_sync/test_alias_migration.py index 9a74b699b..9b811b692 100644 --- a/test_elasticsearch/test_dsl/test_integration/test_examples/_sync/test_alias_migration.py +++ b/test_elasticsearch/test_dsl/test_integration/test_examples/_sync/test_alias_migration.py @@ -16,6 +16,7 @@ # under the License. import pytest + from elasticsearch import Elasticsearch from ..examples import alias_migration diff --git a/test_elasticsearch/test_dsl/test_integration/test_examples/_sync/test_completion.py b/test_elasticsearch/test_dsl/test_integration/test_examples/_sync/test_completion.py index 6dec13e20..472e067ae 100644 --- a/test_elasticsearch/test_dsl/test_integration/test_examples/_sync/test_completion.py +++ b/test_elasticsearch/test_dsl/test_integration/test_examples/_sync/test_completion.py @@ -16,6 +16,7 @@ # under the License. import pytest + from elasticsearch import Elasticsearch from ..examples.completion import Person diff --git a/test_elasticsearch/test_dsl/test_integration/test_examples/_sync/test_composite_aggs.py b/test_elasticsearch/test_dsl/test_integration/test_examples/_sync/test_composite_aggs.py index f7d519f92..95581a912 100644 --- a/test_elasticsearch/test_dsl/test_integration/test_examples/_sync/test_composite_aggs.py +++ b/test_elasticsearch/test_dsl/test_integration/test_examples/_sync/test_composite_aggs.py @@ -16,8 +16,8 @@ # under the License. import pytest -from elasticsearch import Elasticsearch +from elasticsearch import Elasticsearch from elasticsearch.dsl import A, Search from ..examples.composite_agg import scan_aggs diff --git a/test_elasticsearch/test_dsl/test_integration/test_examples/_sync/test_parent_child.py b/test_elasticsearch/test_dsl/test_integration/test_examples/_sync/test_parent_child.py index 514f03686..faa1771f9 100644 --- a/test_elasticsearch/test_dsl/test_integration/test_examples/_sync/test_parent_child.py +++ b/test_elasticsearch/test_dsl/test_integration/test_examples/_sync/test_parent_child.py @@ -18,8 +18,8 @@ from datetime import datetime import pytest -from elasticsearch import Elasticsearch +from elasticsearch import Elasticsearch from elasticsearch.dsl import Q from ..examples.parent_child import Answer, Comment, Question, User, setup diff --git a/test_elasticsearch/test_dsl/test_integration/test_examples/_sync/test_percolate.py b/test_elasticsearch/test_dsl/test_integration/test_examples/_sync/test_percolate.py index 925d362c2..c8b4d2095 100644 --- a/test_elasticsearch/test_dsl/test_integration/test_examples/_sync/test_percolate.py +++ b/test_elasticsearch/test_dsl/test_integration/test_examples/_sync/test_percolate.py @@ -16,6 +16,7 @@ # under the License. import pytest + from elasticsearch import Elasticsearch from ..examples.percolate import BlogPost, setup diff --git a/test_elasticsearch/test_dsl/test_integration/test_examples/_sync/test_vectors.py b/test_elasticsearch/test_dsl/test_integration/test_examples/_sync/test_vectors.py index ff0d0e759..2bfdce8c5 100644 --- a/test_elasticsearch/test_dsl/test_integration/test_examples/_sync/test_vectors.py +++ b/test_elasticsearch/test_dsl/test_integration/test_examples/_sync/test_vectors.py @@ -20,8 +20,8 @@ from unittest import SkipTest import pytest -from elasticsearch import Elasticsearch +from elasticsearch import Elasticsearch from test_elasticsearch.test_dsl.sleep import sleep from ..examples import vectors diff --git a/test_elasticsearch/test_server/test_vectorstore/test_vectorstore.py b/test_elasticsearch/test_server/test_vectorstore/test_vectorstore.py index 3e17442eb..f44334fc4 100644 --- a/test_elasticsearch/test_server/test_vectorstore/test_vectorstore.py +++ b/test_elasticsearch/test_server/test_vectorstore/test_vectorstore.py @@ -485,7 +485,7 @@ def assert_query( ) store.add_texts(texts) - ## without fetch_k parameter + # without fetch_k parameter output = store.search( query="foo", k=3, @@ -551,7 +551,7 @@ def assert_query( ) store.add_texts(texts) - ## with fetch_k parameter + # with fetch_k parameter output = store.search( query="foo", k=3, From 9ddd26dd44fcf74118bcc92f339f64349ff772e9 Mon Sep 17 00:00:00 2001 From: Miguel Grinberg Date: Mon, 13 Jan 2025 16:48:35 +0000 Subject: [PATCH 03/11] runasync dsl --- examples/dsl/async/semantic_text.py | 2 +- examples/dsl/async/sparse_vectors.py | 2 +- examples/dsl/async/vectors.py | 2 +- examples/dsl/semantic_text.py | 2 +- examples/dsl/sparse_vectors.py | 2 +- examples/dsl/vectors.py | 2 +- noxfile.py | 2 ++ 7 files changed, 8 insertions(+), 6 deletions(-) diff --git a/examples/dsl/async/semantic_text.py b/examples/dsl/async/semantic_text.py index 0c416067b..426bf1bbe 100644 --- a/examples/dsl/async/semantic_text.py +++ b/examples/dsl/async/semantic_text.py @@ -21,7 +21,7 @@ Requirements: -$ pip install "elasticsearch-dsl[async]" tqdm +$ pip install "elasticsearch[async]" tqdm Before running this example, an ELSER inference endpoint must be created in the Elasticsearch cluster. This can be done manually from Kibana, or with the diff --git a/examples/dsl/async/sparse_vectors.py b/examples/dsl/async/sparse_vectors.py index a8abddbda..4c0cfc1ac 100644 --- a/examples/dsl/async/sparse_vectors.py +++ b/examples/dsl/async/sparse_vectors.py @@ -20,7 +20,7 @@ Requirements: -$ pip install nltk tqdm elasticsearch-dsl[async] +$ pip install nltk tqdm elasticsearch[async] Before running this example, the ELSER v2 model must be downloaded and deployed to the Elasticsearch cluster, and an ingest pipeline must be defined. This can diff --git a/examples/dsl/async/vectors.py b/examples/dsl/async/vectors.py index 5f2a73db6..1dc183f53 100644 --- a/examples/dsl/async/vectors.py +++ b/examples/dsl/async/vectors.py @@ -20,7 +20,7 @@ Requirements: -$ pip install nltk sentence_transformers tqdm elasticsearch-dsl[async] +$ pip install nltk sentence_transformers tqdm elasticsearch[async] To run the example: diff --git a/examples/dsl/semantic_text.py b/examples/dsl/semantic_text.py index aff2d8097..8d552a2aa 100644 --- a/examples/dsl/semantic_text.py +++ b/examples/dsl/semantic_text.py @@ -21,7 +21,7 @@ Requirements: -$ pip install "elasticsearch-dsl" tqdm +$ pip install "elasticsearch" tqdm Before running this example, an ELSER inference endpoint must be created in the Elasticsearch cluster. This can be done manually from Kibana, or with the diff --git a/examples/dsl/sparse_vectors.py b/examples/dsl/sparse_vectors.py index 970f4f4eb..c328769eb 100644 --- a/examples/dsl/sparse_vectors.py +++ b/examples/dsl/sparse_vectors.py @@ -20,7 +20,7 @@ Requirements: -$ pip install nltk tqdm elasticsearch-dsl +$ pip install nltk tqdm elasticsearch Before running this example, the ELSER v2 model must be downloaded and deployed to the Elasticsearch cluster, and an ingest pipeline must be defined. This can diff --git a/examples/dsl/vectors.py b/examples/dsl/vectors.py index 5e00d5537..b4c700b71 100644 --- a/examples/dsl/vectors.py +++ b/examples/dsl/vectors.py @@ -20,7 +20,7 @@ Requirements: -$ pip install nltk sentence_transformers tqdm elasticsearch-dsl +$ pip install nltk sentence_transformers tqdm elasticsearch To run the example: diff --git a/noxfile.py b/noxfile.py index b2f211a13..dc92de9c1 100644 --- a/noxfile.py +++ b/noxfile.py @@ -70,6 +70,7 @@ def format(session): session.install("black~=24.0", "isort", "flynt", "unasync>=0.6.0") session.run("python", "utils/run-unasync.py") + session.run("python", "utils/run-unasync-dsl.py") session.run("isort", "--profile=black", *SOURCE_FILES) session.run("flynt", *SOURCE_FILES) session.run("black", *SOURCE_FILES) @@ -97,6 +98,7 @@ def lint(session): session.run("isort", "--check", "--profile=black", *SOURCE_FILES) session.run("black", "--check", *SOURCE_FILES) session.run("python", "utils/run-unasync.py", "--check") + session.run("python", "utils/run-unasync-dsl.py", "--check") session.run("flake8", *SOURCE_FILES) session.run("python", "utils/license-headers.py", "check", *SOURCE_FILES) From ab84e4bc9e5b30e90aaf00692e45106d1017372f Mon Sep 17 00:00:00 2001 From: Miguel Grinberg Date: Mon, 13 Jan 2025 17:28:50 +0000 Subject: [PATCH 04/11] dsl generator --- noxfile.py | 5 +- utils/dsl-generator.py | 855 +++++++++++++++++++++++ utils/run-unasync-dsl.py | 151 ++++ utils/templates/aggs.py.tpl | 320 +++++++++ utils/templates/query.py.tpl | 373 ++++++++++ utils/templates/response.__init__.py.tpl | 225 ++++++ utils/templates/types.py.tpl | 107 +++ 7 files changed, 2035 insertions(+), 1 deletion(-) create mode 100644 utils/dsl-generator.py create mode 100644 utils/run-unasync-dsl.py create mode 100644 utils/templates/aggs.py.tpl create mode 100644 utils/templates/query.py.tpl create mode 100644 utils/templates/response.__init__.py.tpl create mode 100644 utils/templates/types.py.tpl diff --git a/noxfile.py b/noxfile.py index dc92de9c1..a2ed2d987 100644 --- a/noxfile.py +++ b/noxfile.py @@ -67,10 +67,13 @@ def test_otel(session): @nox.session() def format(session): - session.install("black~=24.0", "isort", "flynt", "unasync>=0.6.0") + session.install( + "black~=24.0", "isort", "flynt", "unasync>=0.6.0", "jinja2", "elastic-transport" + ) session.run("python", "utils/run-unasync.py") session.run("python", "utils/run-unasync-dsl.py") + session.run("python", "utils/dsl-generator.py", env={"PYTHONPATH": "./"}) session.run("isort", "--profile=black", *SOURCE_FILES) session.run("flynt", *SOURCE_FILES) session.run("black", *SOURCE_FILES) diff --git a/utils/dsl-generator.py b/utils/dsl-generator.py new file mode 100644 index 000000000..cc905705a --- /dev/null +++ b/utils/dsl-generator.py @@ -0,0 +1,855 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import json +import re +import textwrap +from urllib.error import HTTPError +from urllib.request import urlopen + +from jinja2 import Environment, PackageLoader, select_autoescape + +from elasticsearch import VERSION + +jinja_env = Environment( + loader=PackageLoader("utils"), + autoescape=select_autoescape(), + trim_blocks=True, + lstrip_blocks=True, +) +query_py = jinja_env.get_template("query.py.tpl") +aggs_py = jinja_env.get_template("aggs.py.tpl") +response_init_py = jinja_env.get_template("response.__init__.py.tpl") +types_py = jinja_env.get_template("types.py.tpl") + +# map with name replacements for Elasticsearch attributes +PROP_REPLACEMENTS = {"from": "from_"} + +# map with Elasticsearch type replacements +# keys and values are in given in "{namespace}:{name}" format +TYPE_REPLACEMENTS = { + "_types.query_dsl:DistanceFeatureQuery": "_types.query_dsl:DistanceFeatureQueryBase", +} + +# some aggregation types are complicated to determine from the schema, so they +# have their correct type here +AGG_TYPES = { + "bucket_count_ks_test": "Pipeline", + "bucket_correlation": "Pipeline", + "bucket_sort": "Bucket", + "categorize_text": "Bucket", + "filter": "Bucket", + "moving_avg": "Pipeline", + "variable_width_histogram": "Bucket", +} + + +def property_to_class_name(name): + return "".join([w.title() if w != "ip" else "IP" for w in name.split("_")]) + + +def wrapped_doc(text, width=70, initial_indent="", subsequent_indent=""): + """Formats a docstring as a list of lines of up to the request width.""" + return textwrap.wrap( + text.replace("\n", " "), + width=width, + initial_indent=initial_indent, + subsequent_indent=subsequent_indent, + ) + + +def add_dict_type(type_): + """Add Dict[str, Any] to a Python type hint.""" + if type_.startswith("Union["): + type_ = f"{type_[:-1]}, Dict[str, Any]]" + else: + type_ = f"Union[{type_}, Dict[str, Any]]" + return type_ + + +def add_seq_dict_type(type_): + """Add Sequence[Dict[str, Any]] to a Python type hint.""" + if type_.startswith("Union["): + type_ = f"{type_[:-1]}, Sequence[Dict[str, Any]]]" + else: + type_ = f"Union[{type_}, Sequence[Dict[str, Any]]]" + return type_ + + +def add_not_set(type_): + """Add DefaultType to a Python type hint.""" + if type_.startswith("Union["): + type_ = f'{type_[:-1]}, "DefaultType"]' + else: + type_ = f'Union[{type_}, "DefaultType"]' + return type_ + + +def type_for_types_py(type_): + """Converts a type rendered in a generic way to the format needed in the + types.py module. + """ + type_ = type_.replace('"DefaultType"', "DefaultType") + type_ = type_.replace('"InstrumentedField"', "InstrumentedField") + type_ = re.sub(r'"(function\.[a-zA-Z0-9_]+)"', r"\1", type_) + type_ = re.sub(r'"types\.([a-zA-Z0-9_]+)"', r'"\1"', type_) + type_ = re.sub(r'"(wrappers\.[a-zA-Z0-9_]+)"', r"\1", type_) + return type_ + + +class ElasticsearchSchema: + """Operations related to the Elasticsearch schema.""" + + def __init__(self): + response = None + for branch in [f"{VERSION[0]}.{VERSION[1]}", "main"]: + url = f"https://raw.githubusercontent.com/elastic/elasticsearch-specification/{branch}/output/schema/schema.json" + try: + response = urlopen(url) + print(f"Initializing code generation with '{branch}' specification.") + break + except HTTPError: + continue + if not response: + raise RuntimeError("Could not download Elasticsearch schema") + self.schema = json.loads(response.read()) + + # Interfaces collects interfaces that are seen while traversing the schema. + # Any interfaces collected here are then rendered as Python in the + # types.py module. + self.interfaces = [] + self.response_interfaces = [] + + def find_type(self, name, namespace=None): + for t in self.schema["types"]: + if t["name"]["name"] == name and ( + namespace is None or t["name"]["namespace"] == namespace + ): + return t + + def inherits_from(self, type_, name, namespace=None): + while "inherits" in type_: + type_ = self.find_type( + type_["inherits"]["type"]["name"], + type_["inherits"]["type"]["namespace"], + ) + if type_["name"]["name"] == name and ( + namespace is None or type_["name"]["namespace"] == namespace + ): + return True + return False + + def get_python_type(self, schema_type, for_response=False): + """Obtain Python typing details for a given schema type + + This method returns a tuple. The first element is a string with the + Python type hint. The second element is a dictionary with Python DSL + specific typing details to be stored in the DslBase._param_defs + attribute (or None if the type does not need to be in _param_defs). + + When `for_response` is `False`, any new interfaces that are discovered + are registered to be generated in "request" style, with alternative + Dict type hints and default values. If `for_response` is `True`, + interfaces are generated just with their declared type, without + Dict alternative and without defaults, to help type checkers be more + effective at parsing response expressions. + """ + if schema_type["kind"] == "instance_of": + type_name = schema_type["type"] + if type_name["namespace"] in ["_types", "internal", "_builtins"]: + if type_name["name"] in ["integer", "uint", "long", "ulong"]: + return "int", None + elif type_name["name"] in ["number", "float", "double"]: + return "float", None + elif type_name["name"] == "string": + return "str", None + elif type_name["name"] == "boolean": + return "bool", None + elif type_name["name"] == "binary": + return "bytes", None + elif type_name["name"] == "null": + return "None", None + elif type_name["name"] == "Field": + if for_response: + return "str", None + else: + return 'Union[str, "InstrumentedField"]', None + else: + # not an instance of a native type, so we get the type and try again + return self.get_python_type( + self.find_type(type_name["name"], type_name["namespace"]), + for_response=for_response, + ) + elif ( + type_name["namespace"] == "_types.query_dsl" + and type_name["name"] == "QueryContainer" + ): + # QueryContainer maps to the DSL's Query class + return "Query", {"type": "query"} + elif ( + type_name["namespace"] == "_types.query_dsl" + and type_name["name"] == "FunctionScoreContainer" + ): + # FunctionScoreContainer maps to the DSL's ScoreFunction class + return "ScoreFunction", {"type": "score_function"} + elif ( + type_name["namespace"] == "_types.aggregations" + and type_name["name"] == "Buckets" + ): + if for_response: + return "Union[Sequence[Any], Dict[str, Any]]", None + else: + return "Dict[str, Query]", {"type": "query", "hash": True} + elif ( + type_name["namespace"] == "_types.aggregations" + and type_name["name"] == "CompositeAggregationSource" + ): + # QueryContainer maps to the DSL's Query class + return "Agg[_R]", None + else: + # for any other instances we get the type and recurse + type_ = self.find_type(type_name["name"], type_name["namespace"]) + if type_: + return self.get_python_type(type_, for_response=for_response) + + elif schema_type["kind"] == "type_alias": + # for an alias, we use the aliased type + return self.get_python_type(schema_type["type"], for_response=for_response) + + elif schema_type["kind"] == "array_of": + # for arrays we use Sequence[element_type] + type_, param = self.get_python_type( + schema_type["value"], for_response=for_response + ) + return f"Sequence[{type_}]", {**param, "multi": True} if param else None + + elif schema_type["kind"] == "dictionary_of": + # for dicts we use Mapping[key_type, value_type] + key_type, key_param = self.get_python_type( + schema_type["key"], for_response=for_response + ) + value_type, value_param = self.get_python_type( + schema_type["value"], for_response=for_response + ) + return f"Mapping[{key_type}, {value_type}]", ( + {**value_param, "hash": True} if value_param else None + ) + + elif schema_type["kind"] == "union_of": + if ( + len(schema_type["items"]) == 2 + and schema_type["items"][0]["kind"] == "instance_of" + and schema_type["items"][1]["kind"] == "array_of" + and schema_type["items"][0] == schema_type["items"][1]["value"] + ): + # special kind of unions in the form Union[type, Sequence[type]] + type_, param = self.get_python_type( + schema_type["items"][0], for_response=for_response + ) + if schema_type["items"][0]["type"]["name"] in [ + "CompletionSuggestOption", + "PhraseSuggestOption", + "TermSuggestOption", + ]: + # for suggest types we simplify this type and return just the array form + return ( + f"Sequence[{type_}]", + ({"type": param["type"], "multi": True} if param else None), + ) + else: + # for every other types we produce an union with the two alternatives + return ( + f"Union[{type_}, Sequence[{type_}]]", + ({"type": param["type"], "multi": True} if param else None), + ) + elif ( + len(schema_type["items"]) == 2 + and schema_type["items"][0]["kind"] == "instance_of" + and schema_type["items"][1]["kind"] == "instance_of" + and schema_type["items"][0]["type"] + == {"name": "T", "namespace": "_spec_utils.PipeSeparatedFlags"} + and schema_type["items"][1]["type"] + == {"name": "string", "namespace": "_builtins"} + ): + # for now we treat PipeSeparatedFlags as a special case + if "PipeSeparatedFlags" not in self.interfaces: + self.interfaces.append("PipeSeparatedFlags") + return '"types.PipeSeparatedFlags"', None + else: + # generic union type + types = list( + dict.fromkeys( # eliminate duplicates + [ + self.get_python_type(t, for_response=for_response) + for t in schema_type["items"] + ] + ) + ) + return "Union[" + ", ".join([type_ for type_, _ in types]) + "]", None + + elif schema_type["kind"] == "enum": + # enums are mapped to Literal[member, ...] + return ( + "Literal[" + + ", ".join( + [f"\"{member['name']}\"" for member in schema_type["members"]] + ) + + "]", + None, + ) + + elif schema_type["kind"] == "interface": + if schema_type["name"]["namespace"] == "_types.query_dsl": + # handle specific DSL classes explicitly to map to existing + # Python DSL classes + if schema_type["name"]["name"].endswith("RangeQuery"): + return '"wrappers.Range[Any]"', None + elif schema_type["name"]["name"].endswith("ScoreFunction"): + # When dropping Python 3.8, use `removesuffix("Function")` instead + name = schema_type["name"]["name"][:-8] + return f'"function.{name}"', None + elif schema_type["name"]["name"].endswith("DecayFunction"): + return '"function.DecayFunction"', None + elif schema_type["name"]["name"].endswith("Function"): + return f"\"function.{schema_type['name']['name']}\"", None + elif schema_type["name"]["namespace"] == "_types.analysis" and schema_type[ + "name" + ]["name"].endswith("Analyzer"): + # not expanding analyzers at this time, maybe in the future + return "str, Dict[str, Any]", None + + # to handle other interfaces we generate a type of the same name + # and add the interface to the interfaces.py module + if schema_type["name"]["name"] not in self.interfaces: + self.interfaces.append(schema_type["name"]["name"]) + if for_response: + self.response_interfaces.append(schema_type["name"]["name"]) + return f"\"types.{schema_type['name']['name']}\"", None + elif schema_type["kind"] == "user_defined_value": + # user_defined_value maps to Python's Any type + return "Any", None + + raise RuntimeError(f"Cannot find Python type for {schema_type}") + + def add_attribute(self, k, arg, for_types_py=False, for_response=False): + """Add an attribute to the internal representation of a class. + + This method adds the argument `arg` to the data structure for a class + stored in `k`. In particular, the argument is added to the `k["args"]` + list, making sure required arguments are first in the list. If the + argument is of a type that needs Python DSL specific typing details to + be stored in the DslBase._param_defs attribute, then this is added to + `k["params"]`. + + When `for_types_py` is `True`, type hints are formatted in the most + convenient way for the types.py file. When possible, double quotes are + removed from types, and for types that are in the same file the quotes + are kept to prevent forward references, but the "types." namespace is + removed. When `for_types_py` is `False`, all non-native types use + quotes and are namespaced. + + When `for_response` is `True`, type hints are not given the optional + dictionary representation, nor the `DefaultType` used for omitted + attributes. + """ + try: + type_, param = self.get_python_type(arg["type"], for_response=for_response) + except RuntimeError: + type_ = "Any" + param = None + if not for_response: + if type_ != "Any": + if 'Sequence["types.' in type_: + type_ = add_seq_dict_type(type_) # interfaces can be given as dicts + elif "types." in type_: + type_ = add_dict_type(type_) # interfaces can be given as dicts + type_ = add_not_set(type_) + if for_types_py: + type_ = type_for_types_py(type_) + required = "(required) " if arg["required"] else "" + server_default = ( + f" Defaults to `{arg['serverDefault']}` if omitted." + if arg.get("serverDefault") + else "" + ) + doc = wrapped_doc( + f":arg {arg['name']}: {required}{arg.get('description', '')}{server_default}", + subsequent_indent=" ", + ) + arg = { + "name": PROP_REPLACEMENTS.get(arg["name"], arg["name"]), + "type": type_, + "doc": doc, + "required": arg["required"], + } + if param is not None: + param = {"name": arg["name"], "param": param} + if arg["required"]: + # insert in the right place so that all required arguments + # appear at the top of the argument list + i = 0 + for i in range(len(k["args"]) + 1): + if i == len(k["args"]): + break + if k["args"][i].get("positional"): + continue + if k["args"][i]["required"] is False: + break + k["args"].insert(i, arg) + else: + k["args"].append(arg) + if param and "params" in k: + k["params"].append(param) + + def add_behaviors(self, type_, k, for_types_py=False, for_response=False): + """Add behaviors reported in the specification of the given type to the + class representation. + """ + if "behaviors" in type_: + for behavior in type_["behaviors"]: + if ( + behavior["type"]["name"] != "AdditionalProperty" + or behavior["type"]["namespace"] != "_spec_utils" + ): + # we do not support this behavior, so we ignore it + continue + key_type, _ = self.get_python_type( + behavior["generics"][0], for_response=for_response + ) + if "InstrumentedField" in key_type: + value_type, _ = self.get_python_type( + behavior["generics"][1], for_response=for_response + ) + if for_types_py: + value_type = value_type.replace('"DefaultType"', "DefaultType") + value_type = value_type.replace( + '"InstrumentedField"', "InstrumentedField" + ) + value_type = re.sub( + r'"(function\.[a-zA-Z0-9_]+)"', r"\1", value_type + ) + value_type = re.sub( + r'"types\.([a-zA-Z0-9_]+)"', r'"\1"', value_type + ) + value_type = re.sub( + r'"(wrappers\.[a-zA-Z0-9_]+)"', r"\1", value_type + ) + k["args"].append( + { + "name": "_field", + "type": add_not_set(key_type), + "doc": [":arg _field: The field to use in this query."], + "required": False, + "positional": True, + } + ) + k["args"].append( + { + "name": "_value", + "type": add_not_set(add_dict_type(value_type)), + "doc": [":arg _value: The query value for the field."], + "required": False, + "positional": True, + } + ) + k["is_single_field"] = True + else: + raise RuntimeError( + f"Non-field AdditionalProperty are not supported for interface {type_['name']['namespace']}:{type_['name']['name']}." + ) + + def property_to_python_class(self, p): + """Return a dictionary with template data necessary to render a schema + property as a Python class. + + Used for "container" sub-classes such as `QueryContainer`, where each + sub-class is represented by a Python DSL class. + + The format is as follows: + + ```python + { + "property_name": "the name of the property", + "name": "the class name to use for the property", + "docstring": "the formatted docstring as a list of strings", + "args": [ # a Python description of each class attribute + "name": "the name of the attribute", + "type": "the Python type hint for the attribute", + "doc": ["formatted lines of documentation to add to class docstring"], + "required": bool, + "positional": bool, + ], + "params": [ + "name": "the attribute name", + "param": "the param dictionary to include in `_param_defs` for the class", + ], # a DSL-specific description of interesting attributes + "is_single_field": bool # True for single-key dicts with field key + "is_multi_field": bool # True for multi-key dicts with field keys + } + ``` + """ + k = { + "property_name": p["name"], + "name": property_to_class_name(p["name"]), + } + k["docstring"] = wrapped_doc(p.get("description") or "") + other_classes = [] + kind = p["type"]["kind"] + if kind == "instance_of": + namespace = p["type"]["type"]["namespace"] + name = p["type"]["type"]["name"] + if f"{namespace}:{name}" in TYPE_REPLACEMENTS: + namespace, name = TYPE_REPLACEMENTS[f"{namespace}:{name}"].split(":") + if name == "QueryContainer" and namespace == "_types.query_dsl": + type_ = { + "kind": "interface", + "properties": [p], + } + else: + type_ = self.find_type(name, namespace) + if p["name"] in AGG_TYPES: + k["parent"] = AGG_TYPES[p["name"]] + + if type_["kind"] == "interface": + # set the correct parent for bucket and pipeline aggregations + if self.inherits_from( + type_, "PipelineAggregationBase", "_types.aggregations" + ): + k["parent"] = "Pipeline" + elif self.inherits_from( + type_, "BucketAggregationBase", "_types.aggregations" + ): + k["parent"] = "Bucket" + + # generate class attributes + k["args"] = [] + k["params"] = [] + self.add_behaviors(type_, k) + while True: + for arg in type_["properties"]: + self.add_attribute(k, arg) + if "inherits" in type_ and "type" in type_["inherits"]: + type_ = self.find_type( + type_["inherits"]["type"]["name"], + type_["inherits"]["type"]["namespace"], + ) + else: + break + + elif type_["kind"] == "type_alias": + if type_["type"]["kind"] == "union_of": + # for unions we create sub-classes + for other in type_["type"]["items"]: + other_class = self.interface_to_python_class( + other["type"]["name"], + other["type"]["namespace"], + for_types_py=False, + ) + other_class["parent"] = k["name"] + other_classes.append(other_class) + else: + raise RuntimeError( + "Cannot generate code for instances of type_alias instances that are not unions." + ) + + else: + raise RuntimeError( + f"Cannot generate code for instances of kind '{type_['kind']}'" + ) + + elif kind == "dictionary_of": + key_type, _ = self.get_python_type(p["type"]["key"]) + if "InstrumentedField" in key_type: + value_type, _ = self.get_python_type(p["type"]["value"]) + if p["type"]["singleKey"]: + # special handling for single-key dicts with field key + k["args"] = [ + { + "name": "_field", + "type": add_not_set(key_type), + "doc": [":arg _field: The field to use in this query."], + "required": False, + "positional": True, + }, + { + "name": "_value", + "type": add_not_set(add_dict_type(value_type)), + "doc": [":arg _value: The query value for the field."], + "required": False, + "positional": True, + }, + ] + k["is_single_field"] = True + else: + # special handling for multi-key dicts with field keys + k["args"] = [ + { + "name": "_fields", + "type": f"Optional[Mapping[{key_type}, {value_type}]]", + "doc": [ + ":arg _fields: A dictionary of fields with their values." + ], + "required": False, + "positional": True, + }, + ] + k["is_multi_field"] = True + else: + raise RuntimeError(f"Cannot generate code for type {p['type']}") + + else: + raise RuntimeError(f"Cannot generate code for type {p['type']}") + return [k] + other_classes + + def interface_to_python_class( + self, + interface, + namespace=None, + *, + for_types_py=True, + for_response=False, + ): + """Return a dictionary with template data necessary to render an + interface a Python class. + + This is used to render interfaces that are referenced by container + classes. The current list of rendered interfaces is passed as a second + argument to allow this method to add more interfaces to it as they are + discovered. + + The returned format is as follows: + + ```python + { + "name": "the class name to use for the interface class", + "parent": "the parent class name", + "args": [ # a Python description of each class attribute + "name": "the name of the attribute", + "type": "the Python type hint for the attribute", + "doc": ["formatted lines of documentation to add to class docstring"], + "required": bool, + "positional": bool, + ], + "buckets_as_dict": "type" # optional, only present in aggregation response + # classes that have buckets that can have a list + # or dict representation + } + ``` + """ + type_ = self.find_type(interface, namespace) + if type_["kind"] not in ["interface", "response"]: + raise RuntimeError(f"Type {interface} is not an interface") + if type_["kind"] == "response": + # we consider responses as interfaces because they also have properties + # but the location of the properties is different + type_ = type_["body"] + k = {"name": interface, "for_response": for_response, "args": []} + k["docstring"] = wrapped_doc(type_.get("description") or "") + self.add_behaviors( + type_, k, for_types_py=for_types_py, for_response=for_response + ) + generics = [] + while True: + for arg in type_["properties"]: + if interface == "ResponseBody" and arg["name"] == "hits": + k["args"].append( + { + "name": "hits", + "type": "Sequence[_R]", + "doc": [":arg hits: search results"], + "required": arg["required"], + } + ) + elif interface == "ResponseBody" and arg["name"] == "aggregations": + # Aggregations are tricky because the DSL client uses a + # flexible representation that is difficult to generate + # from the schema. + # To handle this we let the generator do its work by calling + # `add_attribute()`, but then we save the generated attribute + # apart and replace it with the DSL's `AggResponse` class. + # The generated type is then used in type hints in variables + # and methods of this class. + self.add_attribute( + k, arg, for_types_py=for_types_py, for_response=for_response + ) + k["aggregate_type"] = ( + k["args"][-1]["type"] + .split("Mapping[str, ")[1] + .rsplit("]", 1)[0] + ) + k["args"][-1] = { + "name": "aggregations", + "type": '"AggResponse[_R]"', + "doc": [":arg aggregations: aggregation results"], + "required": arg["required"], + } + elif ( + "name" in type_ + and type_["name"]["name"] == "MultiBucketAggregateBase" + and arg["name"] == "buckets" + ): + # Also during aggregation response generation, the "buckets" + # attribute that many aggregation responses have is very + # complex, supporting over a dozen different aggregation + # types via generics, each in array or object configurations. + # Typing this attribute proved very difficult. A solution + # that worked with mypy and pyright is to type "buckets" + # for the list form, and create a `buckets_as_dict` + # property that is typed appropriately for accessing the + # buckets in dictionary form. + # The generic type is assumed to be the first in the list, + # which is a simplification that should be improved when a + # more complete implementation of generics is added. + if generics[0]["type"]["name"] == "Void": + generic_type = "Any" + else: + _g = self.find_type( + generics[0]["type"]["name"], + generics[0]["type"]["namespace"], + ) + generic_type, _ = self.get_python_type( + _g, for_response=for_response + ) + generic_type = type_for_types_py(generic_type) + k["args"].append( + { + "name": arg["name"], + # for the type we only include the array form, since + # this client does not request the dict form + "type": f"Sequence[{generic_type}]", + "doc": [ + ":arg buckets: (required) the aggregation buckets as a list" + ], + "required": True, + } + ) + k["buckets_as_dict"] = generic_type + else: + if interface == "Hit" and arg["name"].startswith("_"): + # Python DSL removes the undersore prefix from all the + # properties of the hit, so we do the same + arg["name"] = arg["name"][1:] + + self.add_attribute( + k, arg, for_types_py=for_types_py, for_response=for_response + ) + + if "inherits" not in type_ or "type" not in type_["inherits"]: + break + + if "generics" in type_["inherits"]: + # Generics are only supported for certain specific cases at this + # time. Here we just save them so that they can be recalled later + # while traversing over to parent classes to find inherited + # attributes. + for generic_type in type_["inherits"]["generics"]: + generics.append(generic_type) + + type_ = self.find_type( + type_["inherits"]["type"]["name"], + type_["inherits"]["type"]["namespace"], + ) + return k + + +def generate_query_py(schema, filename): + """Generate query.py with all the properties of `QueryContainer` as Python + classes. + """ + classes = [] + query_container = schema.find_type("QueryContainer", "_types.query_dsl") + for p in query_container["properties"]: + classes += schema.property_to_python_class(p) + + with open(filename, "wt") as f: + f.write(query_py.render(classes=classes, parent="Query")) + print(f"Generated {filename}.") + + +def generate_aggs_py(schema, filename): + """Generate aggs.py with all the properties of `AggregationContainer` as + Python classes. + """ + classes = [] + aggs_container = schema.find_type("AggregationContainer", "_types.aggregations") + for p in aggs_container["properties"]: + if "containerProperty" not in p or not p["containerProperty"]: + classes += schema.property_to_python_class(p) + + with open(filename, "wt") as f: + f.write(aggs_py.render(classes=classes, parent="Agg")) + print(f"Generated {filename}.") + + +def generate_response_init_py(schema, filename): + """Generate response/__init__.py with all the response properties + documented and typed. + """ + search_response = schema.interface_to_python_class( + "ResponseBody", + "_global.search", + for_types_py=False, + for_response=True, + ) + ubq_response = schema.interface_to_python_class( + "Response", + "_global.update_by_query", + for_types_py=False, + for_response=True, + ) + with open(filename, "wt") as f: + f.write( + response_init_py.render(response=search_response, ubq_response=ubq_response) + ) + print(f"Generated {filename}.") + + +def generate_types_py(schema, filename): + """Generate types.py""" + classes = {} + for interface in schema.interfaces: + if interface == "PipeSeparatedFlags": + continue # handled as a special case + for_response = interface in schema.response_interfaces + k = schema.interface_to_python_class( + interface, for_types_py=True, for_response=for_response + ) + classes[k["name"]] = k + + # sort classes by being request/response and then by name + sorted_classes = sorted( + list(classes.keys()), + key=lambda i: str(int(i in schema.response_interfaces)) + i, + ) + classes_list = [] + for n in sorted_classes: + k = classes[n] + if k in classes_list: + continue + classes_list.append(k) + + with open(filename, "wt") as f: + f.write(types_py.render(classes=classes_list)) + print(f"Generated {filename}.") + + +if __name__ == "__main__": + schema = ElasticsearchSchema() + generate_query_py(schema, "elasticsearch/dsl/query.py") + generate_aggs_py(schema, "elasticsearch/dsl/aggs.py") + generate_response_init_py(schema, "elasticsearch/dsl/response/__init__.py") + generate_types_py(schema, "elasticsearch/dsl/types.py") diff --git a/utils/run-unasync-dsl.py b/utils/run-unasync-dsl.py new file mode 100644 index 000000000..a3421aede --- /dev/null +++ b/utils/run-unasync-dsl.py @@ -0,0 +1,151 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import os +import subprocess +import sys +from glob import glob +from pathlib import Path + +import unasync + + +def main(check=False): + # the list of directories that need to be processed with unasync + # each entry has two paths: + # - the source path with the async sources + # - the destination path where the sync sources should be written + source_dirs = [ + ( + "elasticsearch/dsl/_async/", + "elasticsearch/dsl/_sync/", + ), + ("test_elasticsearch/test_dsl/_async/", "test_elasticsearch/test_dsl/_sync/"), + ( + "test_elasticsearch/test_dsl/test_integration/_async/", + "test_elasticsearch/test/dsl/test_integration/_sync/", + ), + ( + "test_elasticsearch/test_dsl/test_integration/test_examples/_async/", + "test_elasticsearch/test_dsl/test_integration/test_examples/_sync/", + ), + ("examples/dsl/async/", "examples/dsl/"), + ] + + # Unasync all the generated async code + additional_replacements = { + "_async": "_sync", + "AsyncElasticsearch": "Elasticsearch", + "AsyncSearch": "Search", + "AsyncMultiSearch": "MultiSearch", + "AsyncEmptySearch": "EmptySearch", + "AsyncDocument": "Document", + "AsyncIndexMeta": "IndexMeta", + "AsyncIndexTemplate": "IndexTemplate", + "AsyncIndex": "Index", + "AsyncComposableIndexTemplate": "ComposableIndexTemplate", + "AsyncUpdateByQuery": "UpdateByQuery", + "AsyncMapping": "Mapping", + "AsyncFacetedSearch": "FacetedSearch", + "AsyncUsingType": "UsingType", + "async_connections": "connections", + "async_scan": "scan", + "async_simulate": "simulate", + "async_bulk": "bulk", + "async_mock_client": "mock_client", + "async_client": "client", + "async_data_client": "data_client", + "async_write_client": "write_client", + "async_pull_request": "pull_request", + "async_examples": "examples", + "async_sleep": "sleep", + "assert_awaited_once_with": "assert_called_once_with", + "pytest_asyncio": "pytest", + "asynccontextmanager": "contextmanager", + } + rules = [ + unasync.Rule( + fromdir=dir[0], + todir=f"{dir[0]}_sync_check/" if check else dir[1], + additional_replacements=additional_replacements, + ) + for dir in source_dirs + ] + + filepaths = [] + for root, _, filenames in os.walk(Path(__file__).absolute().parent.parent): + if "/site-packages" in root or "/." in root or "__pycache__" in root: + continue + for filename in filenames: + if filename.rpartition(".")[-1] in ( + "py", + "pyi", + ) and not filename.startswith("utils.py"): + filepaths.append(os.path.join(root, filename)) + + unasync.unasync_files(filepaths, rules) + for dir in source_dirs: + output_dir = f"{dir[0]}_sync_check/" if check else dir[1] + subprocess.check_call(["black", "--target-version=py38", output_dir]) + subprocess.check_call(["isort", output_dir]) + for file in glob("*.py", root_dir=dir[0]): + # remove asyncio from sync files + subprocess.check_call( + ["sed", "-i.bak", "/^import asyncio$/d", f"{output_dir}{file}"] + ) + subprocess.check_call( + [ + "sed", + "-i.bak", + "s/asyncio\\.run(main())/main()/", + f"{output_dir}{file}", + ] + ) + subprocess.check_call( + [ + "sed", + "-i.bak", + "s/elasticsearch\\[async\\]/elasticsearch/", + f"{output_dir}{file}", + ] + ) + subprocess.check_call( + [ + "sed", + "-i.bak", + "s/pytest.mark.asyncio/pytest.mark.sync/", + f"{output_dir}{file}", + ] + ) + subprocess.check_call(["rm", f"{output_dir}{file}.bak"]) + + if check: + # make sure there are no differences between _sync and _sync_check + subprocess.check_call( + [ + "diff", + f"{dir[1]}{file}", + f"{output_dir}{file}", + ] + ) + + if check: + subprocess.check_call(["rm", "-rf", output_dir]) + + +if __name__ == "__main__": + main(check="--check" in sys.argv) diff --git a/utils/templates/aggs.py.tpl b/utils/templates/aggs.py.tpl new file mode 100644 index 000000000..ab2103aa5 --- /dev/null +++ b/utils/templates/aggs.py.tpl @@ -0,0 +1,320 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import collections.abc +from copy import deepcopy +from typing import ( + TYPE_CHECKING, + Any, + ClassVar, + Dict, + Generic, + Iterable, + Literal, + Mapping, + MutableMapping, + Optional, + Sequence, + Union, + cast, +) + +from elastic_transport.client_utils import DEFAULT + +from .query import Query +from .response.aggs import AggResponse, BucketData, FieldBucketData, TopHitsData +from .utils import _R, AttrDict, DslBase + +if TYPE_CHECKING: + from elastic_transport.client_utils import DefaultType + from . import types + from .document_base import InstrumentedField + from .search_base import SearchBase + + +def A( + name_or_agg: Union[MutableMapping[str, Any], "Agg[_R]", str], + filter: Optional[Union[str, "Query"]] = None, + **params: Any, +) -> "Agg[_R]": + if filter is not None: + if name_or_agg != "filter": + raise ValueError( + "Aggregation %r doesn't accept positional argument 'filter'." + % name_or_agg + ) + params["filter"] = filter + + # {"terms": {"field": "tags"}, "aggs": {...}} + if isinstance(name_or_agg, collections.abc.MutableMapping): + if params: + raise ValueError("A() cannot accept parameters when passing in a dict.") + # copy to avoid modifying in-place + agg = deepcopy(name_or_agg) + # pop out nested aggs + aggs = agg.pop("aggs", None) + # pop out meta data + meta = agg.pop("meta", None) + # should be {"terms": {"field": "tags"}} + if len(agg) != 1: + raise ValueError( + 'A() can only accept dict with an aggregation ({"terms": {...}}). ' + "Instead it got (%r)" % name_or_agg + ) + agg_type, params = agg.popitem() + if aggs: + params = params.copy() + params["aggs"] = aggs + if meta: + params = params.copy() + params["meta"] = meta + return Agg[_R].get_dsl_class(agg_type)(_expand__to_dot=False, **params) + + # Terms(...) just return the nested agg + elif isinstance(name_or_agg, Agg): + if params: + raise ValueError( + "A() cannot accept parameters when passing in an Agg object." + ) + return name_or_agg + + # "terms", field="tags" + return Agg[_R].get_dsl_class(name_or_agg)(**params) + + +class Agg(DslBase, Generic[_R]): + _type_name = "agg" + _type_shortcut = staticmethod(A) + name = "" + + def __contains__(self, key: str) -> bool: + return False + + def to_dict(self) -> Dict[str, Any]: + d = super().to_dict() + if isinstance(d[self.name], dict): + n = cast(Dict[str, Any], d[self.name]) + if "meta" in n: + d["meta"] = n.pop("meta") + return d + + def result(self, search: "SearchBase[_R]", data: Dict[str, Any]) -> AttrDict[Any]: + return AggResponse[_R](self, search, data) + + +class AggBase(Generic[_R]): + aggs: Dict[str, Agg[_R]] + _base: Agg[_R] + _params: Dict[str, Any] + _param_defs: ClassVar[Dict[str, Any]] = { + "aggs": {"type": "agg", "hash": True}, + } + + def __contains__(self, key: str) -> bool: + return key in self._params.get("aggs", {}) + + def __getitem__(self, agg_name: str) -> Agg[_R]: + agg = cast( + Agg[_R], self._params.setdefault("aggs", {})[agg_name] + ) # propagate KeyError + + # make sure we're not mutating a shared state - whenever accessing a + # bucket, return a shallow copy of it to be safe + if isinstance(agg, Bucket): + agg = A(agg.name, **agg._params) + # be sure to store the copy so any modifications to it will affect us + self._params["aggs"][agg_name] = agg + + return agg + + def __setitem__(self, agg_name: str, agg: Agg[_R]) -> None: + self.aggs[agg_name] = A(agg) + + def __iter__(self) -> Iterable[str]: + return iter(self.aggs) + + def _agg( + self, + bucket: bool, + name: str, + agg_type: Union[Dict[str, Any], Agg[_R], str], + *args: Any, + **params: Any, + ) -> Agg[_R]: + agg = self[name] = A(agg_type, *args, **params) + + # For chaining - when creating new buckets return them... + if bucket: + return agg + # otherwise return self._base so we can keep chaining + else: + return self._base + + def metric( + self, + name: str, + agg_type: Union[Dict[str, Any], Agg[_R], str], + *args: Any, + **params: Any, + ) -> Agg[_R]: + return self._agg(False, name, agg_type, *args, **params) + + def bucket( + self, + name: str, + agg_type: Union[Dict[str, Any], Agg[_R], str], + *args: Any, + **params: Any, + ) -> "Bucket[_R]": + return cast("Bucket[_R]", self._agg(True, name, agg_type, *args, **params)) + + def pipeline( + self, + name: str, + agg_type: Union[Dict[str, Any], Agg[_R], str], + *args: Any, + **params: Any, + ) -> "Pipeline[_R]": + return cast("Pipeline[_R]", self._agg(False, name, agg_type, *args, **params)) + + def result(self, search: "SearchBase[_R]", data: Any) -> AttrDict[Any]: + return BucketData(self, search, data) # type: ignore + + +class Bucket(AggBase[_R], Agg[_R]): + def __init__(self, **params: Any): + super().__init__(**params) + # remember self for chaining + self._base = self + + def to_dict(self) -> Dict[str, Any]: + d = super(AggBase, self).to_dict() + if isinstance(d[self.name], dict): + n = cast(AttrDict[Any], d[self.name]) + if "aggs" in n: + d["aggs"] = n.pop("aggs") + return d + + +class Pipeline(Agg[_R]): + pass + + +{% for k in classes %} +class {{ k.name }}({{ k.parent if k.parent else parent }}[_R]): + """ + {% for line in k.docstring %} + {{ line }} + {% endfor %} + {% if k.args %} + {% if k.docstring %} + + {% endif %} + {% for kwarg in k.args %} + {% for line in kwarg.doc %} + {{ line }} + {% endfor %} + {% endfor %} + {% endif %} + """ + {% if k.property_name %} + name = "{{ k.property_name }}" + {% endif %} + {% if k.params %} + _param_defs = { + {% for param in k.params %} + "{{ param.name }}": {{ param.param }}, + {% endfor %} + {% if k.name == "Filter" or k.name == "Filters" or k.name == "Composite" %} + {# Some #} + "aggs": {"type": "agg", "hash": True}, + {% endif %} + } + {% endif %} + + def __init__( + self, + {% if k.args | length != 1 %} + {% for arg in k.args %} + {% if arg.positional %} + {{ arg.name }}: {{ arg.type }} = DEFAULT, + {% endif %} + {% endfor %} + {% if k.args and not k.args[-1].positional %} + *, + {% endif %} + {% for arg in k.args %} + {% if not arg.positional %} + {{ arg.name }}: {{ arg.type }} = DEFAULT, + {% endif %} + {% endfor %} + {% else %} + {# when we have just one argument, we allow it as positional or keyword #} + {% for arg in k.args %} + {{ arg.name }}: {{ arg.type }} = DEFAULT, + {% endfor %} + {% endif %} + **kwargs: Any + ): + {% if k.name == "FunctionScore" %} + {# continuation of the FunctionScore shortcut property support from above #} + if functions is DEFAULT: + functions = [] + for name in ScoreFunction._classes: + if name in kwargs: + functions.append({name: kwargs.pop(name)}) # type: ignore + {% elif k.is_single_field %} + if _field is not DEFAULT: + kwargs[str(_field)] = _value + {% elif k.is_multi_field %} + if _fields is not DEFAULT: + for field, value in _fields.items(): + kwargs[str(field)] = value + {% endif %} + super().__init__( + {% for arg in k.args %} + {% if not arg.positional %} + {{ arg.name }}={{ arg.name }}, + {% endif %} + {% endfor %} + **kwargs + ) + + {# what follows is a set of Pythonic enhancements to some of the query classes + which are outside the scope of the code generator #} + {% if k.name == "Filter" %} + def to_dict(self) -> Dict[str, Any]: + d = super().to_dict() + if isinstance(d[self.name], dict): + n = cast(AttrDict[Any], d[self.name]) + n.update(n.pop("filter", {})) + return d + + {% elif k.name == "Histogram" or k.name == "DateHistogram" or k.name == "AutoDateHistogram" or k.name == "VariableWidthHistogram" %} + def result(self, search: "SearchBase[_R]", data: Any) -> AttrDict[Any]: + return FieldBucketData(self, search, data) + + {% elif k.name == "Terms" %} + def result(self, search: "SearchBase[_R]", data: Any) -> AttrDict[Any]: + return FieldBucketData(self, search, data) + + {% elif k.name == "TopHits" %} + def result(self, search: "SearchBase[_R]", data: Any) -> AttrDict[Any]: + return TopHitsData(self, search, data) + + {% endif %} +{% endfor %} diff --git a/utils/templates/query.py.tpl b/utils/templates/query.py.tpl new file mode 100644 index 000000000..dd26287cf --- /dev/null +++ b/utils/templates/query.py.tpl @@ -0,0 +1,373 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import collections.abc +from copy import deepcopy +from itertools import chain +from typing import ( + TYPE_CHECKING, + Any, + Callable, + ClassVar, + Dict, + List, + Literal, + Mapping, + MutableMapping, + Optional, + Protocol, + Sequence, + TypeVar, + Union, + cast, + overload, +) + +from elastic_transport.client_utils import DEFAULT + +# 'SF' looks unused but the test suite assumes it's available +# from this module so others are liable to do so as well. +from .function import SF # noqa: F401 +from .function import ScoreFunction +from .utils import DslBase + +if TYPE_CHECKING: + from elastic_transport.client_utils import DefaultType + from . import types, wrappers + from .document_base import InstrumentedField + +_T = TypeVar("_T") +_M = TypeVar("_M", bound=Mapping[str, Any]) + + +class QProxiedProtocol(Protocol[_T]): + _proxied: _T + + +@overload +def Q(name_or_query: MutableMapping[str, _M]) -> "Query": ... + + +@overload +def Q(name_or_query: "Query") -> "Query": ... + + +@overload +def Q(name_or_query: QProxiedProtocol[_T]) -> _T: ... + + +@overload +def Q(name_or_query: str = "match_all", **params: Any) -> "Query": ... + + +def Q( + name_or_query: Union[ + str, + "Query", + QProxiedProtocol[_T], + MutableMapping[str, _M], + ] = "match_all", + **params: Any, +) -> Union["Query", _T]: + # {"match": {"title": "python"}} + if isinstance(name_or_query, collections.abc.MutableMapping): + if params: + raise ValueError("Q() cannot accept parameters when passing in a dict.") + if len(name_or_query) != 1: + raise ValueError( + 'Q() can only accept dict with a single query ({"match": {...}}). ' + "Instead it got (%r)" % name_or_query + ) + name, q_params = deepcopy(name_or_query).popitem() + return Query.get_dsl_class(name)(_expand__to_dot=False, **q_params) + + # MatchAll() + if isinstance(name_or_query, Query): + if params: + raise ValueError( + "Q() cannot accept parameters when passing in a Query object." + ) + return name_or_query + + # s.query = Q('filtered', query=s.query) + if hasattr(name_or_query, "_proxied"): + return cast(QProxiedProtocol[_T], name_or_query)._proxied + + # "match", title="python" + return Query.get_dsl_class(name_or_query)(**params) + + +class Query(DslBase): + _type_name = "query" + _type_shortcut = staticmethod(Q) + name: ClassVar[Optional[str]] = None + + # Add type annotations for methods not defined in every subclass + __ror__: ClassVar[Callable[["Query", "Query"], "Query"]] + __radd__: ClassVar[Callable[["Query", "Query"], "Query"]] + __rand__: ClassVar[Callable[["Query", "Query"], "Query"]] + + def __add__(self, other: "Query") -> "Query": + # make sure we give queries that know how to combine themselves + # preference + if hasattr(other, "__radd__"): + return other.__radd__(self) + return Bool(must=[self, other]) + + def __invert__(self) -> "Query": + return Bool(must_not=[self]) + + def __or__(self, other: "Query") -> "Query": + # make sure we give queries that know how to combine themselves + # preference + if hasattr(other, "__ror__"): + return other.__ror__(self) + return Bool(should=[self, other]) + + def __and__(self, other: "Query") -> "Query": + # make sure we give queries that know how to combine themselves + # preference + if hasattr(other, "__rand__"): + return other.__rand__(self) + return Bool(must=[self, other]) + + +{% for k in classes %} +class {{ k.name }}({{ parent }}): + """ + {% for line in k.docstring %} + {{ line }} + {% endfor %} + {% if k.args %} + {% if k.docstring %} + + {% endif %} + {% for kwarg in k.args %} + {% for line in kwarg.doc %} + {{ line }} + {% endfor %} + {% endfor %} + {% endif %} + """ + name = "{{ k.property_name }}" + {% if k.params %} + _param_defs = { + {% for param in k.params %} + "{{ param.name }}": {{ param.param }}, + {% endfor %} + {% if k.name == "FunctionScore" %} + {# The FunctionScore class implements a custom solution for the `functions` + shortcut property. Until the code generator can support shortcut + properties directly that solution is added here #} + "filter": {"type": "query"}, + {% endif %} + } + {% endif %} + + def __init__( + self, + {% for arg in k.args %} + {% if arg.positional %} + {{ arg.name }}: {{ arg.type }} = DEFAULT, + {% endif %} + {% endfor %} + {% if k.args and not k.args[-1].positional %} + *, + {% endif %} + {% for arg in k.args %} + {% if not arg.positional %} + {{ arg.name }}: {{ arg.type }} = DEFAULT, + {% endif %} + {% endfor %} + **kwargs: Any + ): + {% if k.name == "FunctionScore" %} + {# continuation of the FunctionScore shortcut property support from above #} + if functions is DEFAULT: + functions = [] + for name in ScoreFunction._classes: + if name in kwargs: + functions.append({name: kwargs.pop(name)}) # type: ignore + {% elif k.is_single_field %} + if _field is not DEFAULT: + kwargs[str(_field)] = _value + {% elif k.is_multi_field %} + if _fields is not DEFAULT: + for field, value in _fields.items(): + kwargs[str(field)] = value + {% endif %} + super().__init__( + {% for arg in k.args %} + {% if not arg.positional %} + {{ arg.name }}={{ arg.name }}, + {% endif %} + {% endfor %} + **kwargs + ) + + {# what follows is a set of Pythonic enhancements to some of the query classes + which are outside the scope of the code generator #} + {% if k.name == "MatchAll" %} + def __add__(self, other: "Query") -> "Query": + return other._clone() + + __and__ = __rand__ = __radd__ = __add__ + + def __or__(self, other: "Query") -> "MatchAll": + return self + + __ror__ = __or__ + + def __invert__(self) -> "MatchNone": + return MatchNone() + + +EMPTY_QUERY = MatchAll() + + {% elif k.name == "MatchNone" %} + def __add__(self, other: "Query") -> "MatchNone": + return self + + __and__ = __rand__ = __radd__ = __add__ + + def __or__(self, other: "Query") -> "Query": + return other._clone() + + __ror__ = __or__ + + def __invert__(self) -> MatchAll: + return MatchAll() + + {% elif k.name == "Bool" %} + def __add__(self, other: Query) -> "Bool": + q = self._clone() + if isinstance(other, Bool): + q.must += other.must + q.should += other.should + q.must_not += other.must_not + q.filter += other.filter + else: + q.must.append(other) + return q + + __radd__ = __add__ + + def __or__(self, other: Query) -> Query: + for q in (self, other): + if isinstance(q, Bool) and not any( + (q.must, q.must_not, q.filter, getattr(q, "minimum_should_match", None)) + ): + other = self if q is other else other + q = q._clone() + if isinstance(other, Bool) and not any( + ( + other.must, + other.must_not, + other.filter, + getattr(other, "minimum_should_match", None), + ) + ): + q.should.extend(other.should) + else: + q.should.append(other) + return q + + return Bool(should=[self, other]) + + __ror__ = __or__ + + @property + def _min_should_match(self) -> int: + return getattr( + self, + "minimum_should_match", + 0 if not self.should or (self.must or self.filter) else 1, + ) + + def __invert__(self) -> Query: + # Because an empty Bool query is treated like + # MatchAll the inverse should be MatchNone + if not any(chain(self.must, self.filter, self.should, self.must_not)): + return MatchNone() + + negations: List[Query] = [] + for q in chain(self.must, self.filter): + negations.append(~q) + + for q in self.must_not: + negations.append(q) + + if self.should and self._min_should_match: + negations.append(Bool(must_not=self.should[:])) + + if len(negations) == 1: + return negations[0] + return Bool(should=negations) + + def __and__(self, other: Query) -> Query: + q = self._clone() + if isinstance(other, Bool): + q.must += other.must + q.must_not += other.must_not + q.filter += other.filter + q.should = [] + + # reset minimum_should_match as it will get calculated below + if "minimum_should_match" in q._params: + del q._params["minimum_should_match"] + + for qx in (self, other): + min_should_match = qx._min_should_match + # TODO: percentages or negative numbers will fail here + # for now we report an error + if not isinstance(min_should_match, int) or min_should_match < 0: + raise ValueError( + "Can only combine queries with positive integer values for minimum_should_match" + ) + # all subqueries are required + if len(qx.should) <= min_should_match: + q.must.extend(qx.should) + # not all of them are required, use it and remember min_should_match + elif not q.should: + q.minimum_should_match = min_should_match + q.should = qx.should + # all queries are optional, just extend should + elif q._min_should_match == 0 and min_should_match == 0: + q.should.extend(qx.should) + # not all are required, add a should list to the must with proper min_should_match + else: + q.must.append( + Bool(should=qx.should, minimum_should_match=min_should_match) + ) + else: + if not (q.must or q.filter) and q.should: + q._params.setdefault("minimum_should_match", 1) + q.must.append(other) + return q + + __rand__ = __and__ + + {% elif k.name == "Terms" %} + def _setattr(self, name: str, value: Any) -> None: + # here we convert any iterables that are not strings to lists + if hasattr(value, "__iter__") and not isinstance(value, (str, list, dict)): + value = list(value) + super()._setattr(name, value) + + {% endif %} + +{% endfor %} diff --git a/utils/templates/response.__init__.py.tpl b/utils/templates/response.__init__.py.tpl new file mode 100644 index 000000000..f9ae5c4ef --- /dev/null +++ b/utils/templates/response.__init__.py.tpl @@ -0,0 +1,225 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import ( + TYPE_CHECKING, + Any, + Dict, + Generic, + Iterator, + List, + Mapping, + Optional, + Sequence, + Tuple, + Union, + cast, +) + +from ..utils import _R, AttrDict, AttrList, _wrap +from .hit import Hit, HitMeta + +if TYPE_CHECKING: + from ..aggs import Agg + from ..faceted_search_base import FacetedSearchBase + from ..search_base import Request, SearchBase + from ..update_by_query_base import UpdateByQueryBase + from .. import types + +__all__ = ["Response", "AggResponse", "UpdateByQueryResponse", "Hit", "HitMeta", "AggregateResponseType"] + + +class Response(AttrDict[Any], Generic[_R]): + """An Elasticsearch search response. + + {% for arg in response.args %} + {% for line in arg.doc %} + {{ line }} + {% endfor %} + {% endfor %} + """ + _search: "SearchBase[_R]" + _faceted_search: "FacetedSearchBase[_R]" + _doc_class: Optional[_R] + _hits: List[_R] + + {% for arg in response.args %} + {% if arg.name not in ["hits", "aggregations"] %} + {{ arg.name }}: {{ arg.type }} + {% endif %} + {% endfor %} + + def __init__( + self, + search: "Request[_R]", + response: Dict[str, Any], + doc_class: Optional[_R] = None, + ): + super(AttrDict, self).__setattr__("_search", search) + super(AttrDict, self).__setattr__("_doc_class", doc_class) + super().__init__(response) + + def __iter__(self) -> Iterator[_R]: # type: ignore[override] + return iter(self.hits) + + def __getitem__(self, key: Union[slice, int, str]) -> Any: + if isinstance(key, (slice, int)): + # for slicing etc + return self.hits[key] + return super().__getitem__(key) + + def __nonzero__(self) -> bool: + return bool(self.hits) + + __bool__ = __nonzero__ + + def __repr__(self) -> str: + return "" % (self.hits or self.aggregations) + + def __len__(self) -> int: + return len(self.hits) + + def __getstate__(self) -> Tuple[Dict[str, Any], "Request[_R]", Optional[_R]]: # type: ignore[override] + return self._d_, self._search, self._doc_class + + def __setstate__( + self, state: Tuple[Dict[str, Any], "Request[_R]", Optional[_R]] # type: ignore[override] + ) -> None: + super(AttrDict, self).__setattr__("_d_", state[0]) + super(AttrDict, self).__setattr__("_search", state[1]) + super(AttrDict, self).__setattr__("_doc_class", state[2]) + + def success(self) -> bool: + return self._shards.total == self._shards.successful and not self.timed_out + + @property + def hits(self) -> List[_R]: + if not hasattr(self, "_hits"): + h = cast(AttrDict[Any], self._d_["hits"]) + + try: + hits = AttrList(list(map(self._search._get_result, h["hits"]))) + except AttributeError as e: + # avoid raising AttributeError since it will be hidden by the property + raise TypeError("Could not parse hits.", e) + + # avoid assigning _hits into self._d_ + super(AttrDict, self).__setattr__("_hits", hits) + for k in h: + setattr(self._hits, k, _wrap(h[k])) + return self._hits + + @property + def aggregations(self) -> "AggResponse[_R]": + return self.aggs + + @property + def aggs(self) -> "AggResponse[_R]": + if not hasattr(self, "_aggs"): + aggs = AggResponse[_R]( + cast("Agg[_R]", self._search.aggs), + self._search, + cast(Dict[str, Any], self._d_.get("aggregations", {})), + ) + + # avoid assigning _aggs into self._d_ + super(AttrDict, self).__setattr__("_aggs", aggs) + return cast("AggResponse[_R]", self._aggs) + + def search_after(self) -> "SearchBase[_R]": + """ + Return a ``Search`` instance that retrieves the next page of results. + + This method provides an easy way to paginate a long list of results using + the ``search_after`` option. For example:: + + page_size = 20 + s = Search()[:page_size].sort("date") + + while True: + # get a page of results + r = await s.execute() + + # do something with this page of results + + # exit the loop if we reached the end + if len(r.hits) < page_size: + break + + # get a search object with the next page of results + s = r.search_after() + + Note that the ``search_after`` option requires the search to have an + explicit ``sort`` order. + """ + if len(self.hits) == 0: + raise ValueError("Cannot use search_after when there are no search results") + if not hasattr(self.hits[-1].meta, "sort"): # type: ignore + raise ValueError("Cannot use search_after when results are not sorted") + return self._search.extra(search_after=self.hits[-1].meta.sort) # type: ignore + + +AggregateResponseType = {{ response["aggregate_type"] }} + + +class AggResponse(AttrDict[Any], Generic[_R]): + """An Elasticsearch aggregation response.""" + _meta: Dict[str, Any] + + def __init__(self, aggs: "Agg[_R]", search: "Request[_R]", data: Dict[str, Any]): + super(AttrDict, self).__setattr__("_meta", {"search": search, "aggs": aggs}) + super().__init__(data) + + def __getitem__(self, attr_name: str) -> AggregateResponseType: + if attr_name in self._meta["aggs"]: + # don't do self._meta['aggs'][attr_name] to avoid copying + agg = self._meta["aggs"].aggs[attr_name] + return cast(AggregateResponseType, agg.result(self._meta["search"], self._d_[attr_name])) + return super().__getitem__(attr_name) # type: ignore + + def __iter__(self) -> Iterator[AggregateResponseType]: # type: ignore[override] + for name in self._meta["aggs"]: + yield self[name] + + +class UpdateByQueryResponse(AttrDict[Any], Generic[_R]): + """An Elasticsearch update by query response. + + {% for arg in ubq_response.args %} + {% for line in arg.doc %} + {{ line }} + {% endfor %} + {% endfor %} + """ + _search: "UpdateByQueryBase[_R]" + + {% for arg in ubq_response.args %} + {{ arg.name }}: {{ arg.type }} + {% endfor %} + + def __init__( + self, + search: "Request[_R]", + response: Dict[str, Any], + doc_class: Optional[_R] = None, + ): + super(AttrDict, self).__setattr__("_search", search) + super(AttrDict, self).__setattr__("_doc_class", doc_class) + super().__init__(response) + + def success(self) -> bool: + return not self.timed_out and not self.failures diff --git a/utils/templates/types.py.tpl b/utils/templates/types.py.tpl new file mode 100644 index 000000000..776b8df4f --- /dev/null +++ b/utils/templates/types.py.tpl @@ -0,0 +1,107 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import Any, Dict, Literal, Mapping, Sequence, Union + +from elastic_transport.client_utils import DEFAULT, DefaultType + +from . import Query +from .document_base import InstrumentedField +from .utils import AttrDict + +PipeSeparatedFlags = str + + +{% for k in classes %} +class {{ k.name }}({{ k.parent if k.parent else "AttrDict[Any]" }}): + {% if k.docstring or k.args %} + """ + {% for line in k.docstring %} + {{ line }} + {% endfor %} + {% if k.args %} + {% if k.docstring %} + + {% endif %} + {% endif %} + {% for arg in k.args %} + {% for line in arg.doc %} + {{ line }} + {% endfor %} + {% endfor %} + """ + {% for arg in k.args %} + {% if arg.name not in ["keys", "items"] %} + {{ arg.name }}: {{ arg.type }} + {% else %} + {{ arg.name }}: {{ arg.type }} # type: ignore[assignment] + {% endif %} + {% endfor %} + {% if not k.for_response %} + + def __init__( + self, + {% for arg in k.args %} + {% if arg.positional %} + {{ arg.name }}: {{ arg.type }} = DEFAULT, + {% endif %} + {% endfor %} + {% if k.args and not k.args[-1].positional %} + *, + {% endif %} + {% for arg in k.args %} + {% if not arg.positional %} + {{ arg.name }}: {{ arg.type }} = DEFAULT, + {% endif %} + {% endfor %} + **kwargs: Any + ): + {% if k.is_single_field %} + if _field is not DEFAULT: + kwargs[str(_field)] = _value + {% elif k.is_multi_field %} + if _fields is not DEFAULT: + for field, value in _fields.items(): + kwargs[str(field)] = value + {% endif %} + {% for arg in k.args %} + {% if not arg.positional %} + if {{ arg.name }} is not DEFAULT: + {% if "InstrumentedField" in arg.type %} + kwargs["{{ arg.name }}"] = str({{ arg.name }}) + {% else %} + kwargs["{{ arg.name }}"] = {{ arg.name }} + {% endif %} + {% endif %} + {% endfor %} + {% if k.parent %} + super().__init__(**kwargs) + {% else %} + super().__init__(kwargs) + {% endif %} + {% endif %} + {% if k.buckets_as_dict %} + + @property + def buckets_as_dict(self) -> Mapping[str, {{ k.buckets_as_dict }}]: + return self.buckets # type: ignore + {% endif %} + {% else %} + pass + {% endif %} + +{% endfor %} From 89372421766f005a7fdfb4729dba69bcc4509544 Mon Sep 17 00:00:00 2001 From: Miguel Grinberg Date: Mon, 13 Jan 2025 17:58:50 +0000 Subject: [PATCH 05/11] fix pytest configuration --- pyproject.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 54c0378f1..33abbd5d9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -124,7 +124,6 @@ markers = [ "sync: mark a test as performing I/O without asyncio.", ] filterwarnings = [ - "error", "ignore:Legacy index templates are deprecated in favor of composable templates.:elasticsearch.exceptions.ElasticsearchWarning", "ignore:datetime.datetime.utcfromtimestamp() is deprecated and scheduled for removal in a future version..*:DeprecationWarning", "default:enable_cleanup_closed ignored.*:DeprecationWarning", From 836dfba355e867cb86425347448baefec47c86dd Mon Sep 17 00:00:00 2001 From: Miguel Grinberg Date: Tue, 14 Jan 2025 12:27:56 +0000 Subject: [PATCH 06/11] dsl testing fixes --- pyproject.toml | 1 + test_elasticsearch/test_dsl/conftest.py | 73 +++++++++---------------- utils/run-unasync-dsl.py | 2 +- 3 files changed, 27 insertions(+), 49 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 33abbd5d9..0c66e2f50 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -59,6 +59,7 @@ dev = [ "aiohttp", "pytest", "pytest-cov", + "pytest-mock", "pytest-asyncio", "coverage", "jinja2", diff --git a/test_elasticsearch/test_dsl/conftest.py b/test_elasticsearch/test_dsl/conftest.py index 2e5fa91af..c102025fb 100644 --- a/test_elasticsearch/test_dsl/conftest.py +++ b/test_elasticsearch/test_dsl/conftest.py @@ -22,7 +22,7 @@ import time from datetime import datetime from typing import Any, AsyncGenerator, Dict, Generator, Tuple, cast -from unittest import SkipTest, TestCase +from unittest import SkipTest from unittest.mock import AsyncMock, Mock import pytest_asyncio @@ -37,6 +37,7 @@ from elasticsearch.exceptions import ConnectionError from elasticsearch.helpers import bulk +from ..utils import CA_CERTS from .test_integration._async import test_document as async_document from .test_integration._sync import test_document as sync_document from .test_integration.test_data import ( @@ -47,21 +48,21 @@ create_git_index, ) -if "ELASTICSEARCH_URL" in os.environ: - ELASTICSEARCH_URL = os.environ["ELASTICSEARCH_URL"] -else: - ELASTICSEARCH_URL = "http://localhost:9200" - -def get_test_client(wait: bool = True, **kwargs: Any) -> Elasticsearch: +def get_test_client( + elasticsearch_url, wait: bool = True, **kwargs: Any +) -> Elasticsearch: # construct kwargs from the environment kw: Dict[str, Any] = {"request_timeout": 30} + if elasticsearch_url.startswith("https://"): + kw["ca_certs"] = CA_CERTS + if "PYTHON_CONNECTION_CLASS" in os.environ: kw["node_class"] = os.environ["PYTHON_CONNECTION_CLASS"] kw.update(kwargs) - client = Elasticsearch(ELASTICSEARCH_URL, **kw) + client = Elasticsearch(elasticsearch_url, **kw) # wait for yellow status for tries_left in range(100 if wait else 1, 0, -1): @@ -76,15 +77,17 @@ def get_test_client(wait: bool = True, **kwargs: Any) -> Elasticsearch: raise SkipTest("Elasticsearch failed to start.") -async def get_async_test_client(wait: bool = True, **kwargs: Any) -> AsyncElasticsearch: +async def get_async_test_client( + elasticsearch_url, wait: bool = True, **kwargs: Any +) -> AsyncElasticsearch: # construct kwargs from the environment kw: Dict[str, Any] = {"request_timeout": 30} - if "PYTHON_CONNECTION_CLASS" in os.environ: - kw["node_class"] = os.environ["PYTHON_CONNECTION_CLASS"] + if elasticsearch_url.startswith("https://"): + kw["ca_certs"] = CA_CERTS kw.update(kwargs) - client = AsyncElasticsearch(ELASTICSEARCH_URL, **kw) + client = AsyncElasticsearch(elasticsearch_url, **kw) # wait for yellow status for tries_left in range(100 if wait else 1, 0, -1): @@ -100,36 +103,6 @@ async def get_async_test_client(wait: bool = True, **kwargs: Any) -> AsyncElasti raise SkipTest("Elasticsearch failed to start.") -class ElasticsearchTestCase(TestCase): - client: Elasticsearch - - @staticmethod - def _get_client() -> Elasticsearch: - return get_test_client() - - @classmethod - def setup_class(cls) -> None: - cls.client = cls._get_client() - - def teardown_method(self, _: Any) -> None: - # Hidden indices expanded in wildcards in ES 7.7 - expand_wildcards = ["open", "closed"] - if self.es_version() >= (7, 7): - expand_wildcards.append("hidden") - - self.client.indices.delete_data_stream( - name="*", expand_wildcards=expand_wildcards - ) - self.client.indices.delete(index="*", expand_wildcards=expand_wildcards) - self.client.indices.delete_template(name="*") - self.client.indices.delete_index_template(name="*") - - def es_version(self) -> Tuple[int, ...]: - if not hasattr(self, "_es_version"): - self._es_version = _get_version(self.client.info()["version"]["number"]) - return self._es_version - - def _get_version(version_string: str) -> Tuple[int, ...]: if "." not in version_string: return () @@ -138,9 +111,11 @@ def _get_version(version_string: str) -> Tuple[int, ...]: @fixture(scope="session") -def client() -> Elasticsearch: +def client(elasticsearch_url) -> Elasticsearch: try: - connection = get_test_client(wait="WAIT_FOR_ES" in os.environ) + connection = get_test_client( + elasticsearch_url, wait="WAIT_FOR_ES" in os.environ + ) add_connection("default", connection) return connection except SkipTest: @@ -148,9 +123,11 @@ def client() -> Elasticsearch: @pytest_asyncio.fixture -async def async_client() -> AsyncGenerator[AsyncElasticsearch, None]: +async def async_client(elasticsearch_url) -> AsyncGenerator[AsyncElasticsearch, None]: try: - connection = await get_async_test_client(wait="WAIT_FOR_ES" in os.environ) + connection = await get_async_test_client( + elasticsearch_url, wait="WAIT_FOR_ES" in os.environ + ) add_async_connection("default", connection) yield connection await connection.close() @@ -224,8 +201,8 @@ def data_client(client: Elasticsearch) -> Generator[Elasticsearch, None, None]: bulk(client, DATA, raise_on_error=True, refresh=True) bulk(client, FLAT_DATA, raise_on_error=True, refresh=True) yield client - client.indices.delete(index="git") - client.indices.delete(index="flat-git") + client.options(ignore_status=404).indices.delete(index="git") + client.options(ignore_status=404).indices.delete(index="flat-git") @pytest_asyncio.fixture diff --git a/utils/run-unasync-dsl.py b/utils/run-unasync-dsl.py index a3421aede..d089f0e3e 100644 --- a/utils/run-unasync-dsl.py +++ b/utils/run-unasync-dsl.py @@ -37,7 +37,7 @@ def main(check=False): ("test_elasticsearch/test_dsl/_async/", "test_elasticsearch/test_dsl/_sync/"), ( "test_elasticsearch/test_dsl/test_integration/_async/", - "test_elasticsearch/test/dsl/test_integration/_sync/", + "test_elasticsearch/test_dsl/test_integration/_sync/", ), ( "test_elasticsearch/test_dsl/test_integration/test_examples/_async/", From 48a6d54812b224f1afbbfa9a345db11820307a74 Mon Sep 17 00:00:00 2001 From: Miguel Grinberg Date: Wed, 15 Jan 2025 12:34:17 +0000 Subject: [PATCH 07/11] wipe cluster after dsl tests --- test_elasticsearch/test_dsl/conftest.py | 13 ++++++++----- .../test_integration/_async/test_faceted_search.py | 6 +++--- .../test_integration/_sync/test_faceted_search.py | 6 +++--- 3 files changed, 14 insertions(+), 11 deletions(-) diff --git a/test_elasticsearch/test_dsl/conftest.py b/test_elasticsearch/test_dsl/conftest.py index c102025fb..5dd83e54c 100644 --- a/test_elasticsearch/test_dsl/conftest.py +++ b/test_elasticsearch/test_dsl/conftest.py @@ -37,7 +37,7 @@ from elasticsearch.exceptions import ConnectionError from elasticsearch.helpers import bulk -from ..utils import CA_CERTS +from ..utils import CA_CERTS, wipe_cluster from .test_integration._async import test_document as async_document from .test_integration._sync import test_document as sync_document from .test_integration.test_data import ( @@ -110,14 +110,16 @@ def _get_version(version_string: str) -> Tuple[int, ...]: return tuple(int(v) if v.isdigit() else 999 for v in version) -@fixture(scope="session") +@fixture def client(elasticsearch_url) -> Elasticsearch: try: connection = get_test_client( elasticsearch_url, wait="WAIT_FOR_ES" in os.environ ) add_connection("default", connection) - return connection + yield connection + wipe_cluster(connection) + connection.close() except SkipTest: skip() @@ -130,12 +132,13 @@ async def async_client(elasticsearch_url) -> AsyncGenerator[AsyncElasticsearch, ) add_async_connection("default", connection) yield connection + wipe_cluster(connection) await connection.close() except SkipTest: skip() -@fixture(scope="session") +@fixture def es_version(client: Elasticsearch) -> Generator[Tuple[int, ...], None, None]: info = client.info() yield tuple( @@ -192,7 +195,7 @@ def async_mock_client( async_connections._kwargs = {} -@fixture(scope="session") +@fixture def data_client(client: Elasticsearch) -> Generator[Elasticsearch, None, None]: # create mappings create_git_index(client, "git") diff --git a/test_elasticsearch/test_dsl/test_integration/_async/test_faceted_search.py b/test_elasticsearch/test_dsl/test_integration/_async/test_faceted_search.py index 276fd0d1c..bb0fd9257 100644 --- a/test_elasticsearch/test_dsl/test_integration/_async/test_faceted_search.py +++ b/test_elasticsearch/test_dsl/test_integration/_async/test_faceted_search.py @@ -58,7 +58,7 @@ class MetricSearch(AsyncFacetedSearch): } -@pytest.fixture(scope="session") +@pytest.fixture def commit_search_cls(es_version: Tuple[int, ...]) -> Type[AsyncFacetedSearch]: if es_version >= (7, 2): interval_kwargs = {"fixed_interval": "1d"} @@ -86,7 +86,7 @@ class CommitSearch(AsyncFacetedSearch): return CommitSearch -@pytest.fixture(scope="session") +@pytest.fixture def repo_search_cls(es_version: Tuple[int, ...]) -> Type[AsyncFacetedSearch]: interval_type = "calendar_interval" if es_version >= (7, 2) else "interval" @@ -107,7 +107,7 @@ def search(self) -> AsyncSearch: return RepoSearch -@pytest.fixture(scope="session") +@pytest.fixture def pr_search_cls(es_version: Tuple[int, ...]) -> Type[AsyncFacetedSearch]: interval_type = "calendar_interval" if es_version >= (7, 2) else "interval" diff --git a/test_elasticsearch/test_dsl/test_integration/_sync/test_faceted_search.py b/test_elasticsearch/test_dsl/test_integration/_sync/test_faceted_search.py index 8d9baa255..00ce01cc3 100644 --- a/test_elasticsearch/test_dsl/test_integration/_sync/test_faceted_search.py +++ b/test_elasticsearch/test_dsl/test_integration/_sync/test_faceted_search.py @@ -58,7 +58,7 @@ class MetricSearch(FacetedSearch): } -@pytest.fixture(scope="session") +@pytest.fixture def commit_search_cls(es_version: Tuple[int, ...]) -> Type[FacetedSearch]: if es_version >= (7, 2): interval_kwargs = {"fixed_interval": "1d"} @@ -86,7 +86,7 @@ class CommitSearch(FacetedSearch): return CommitSearch -@pytest.fixture(scope="session") +@pytest.fixture def repo_search_cls(es_version: Tuple[int, ...]) -> Type[FacetedSearch]: interval_type = "calendar_interval" if es_version >= (7, 2) else "interval" @@ -107,7 +107,7 @@ def search(self) -> Search: return RepoSearch -@pytest.fixture(scope="session") +@pytest.fixture def pr_search_cls(es_version: Tuple[int, ...]) -> Type[FacetedSearch]: interval_type = "calendar_interval" if es_version >= (7, 2) else "interval" From ac47786eda9e337e71955ad8bb537ee8baf15757 Mon Sep 17 00:00:00 2001 From: Miguel Grinberg Date: Wed, 15 Jan 2025 15:46:14 +0000 Subject: [PATCH 08/11] remove unused coverage option --- noxfile.py | 1 - 1 file changed, 1 deletion(-) diff --git a/noxfile.py b/noxfile.py index a2ed2d987..d25d4dc2b 100644 --- a/noxfile.py +++ b/noxfile.py @@ -37,7 +37,6 @@ def pytest_argv(): "pytest", "--cov-report=term-missing", "--cov=elasticsearch", - "--cov-config=setup.cfg", f"--junitxml={junit_xml}", "--log-level=DEBUG", "--cache-clear", From 5e744296a8852db28eb6685f12cc2901d3d6d09a Mon Sep 17 00:00:00 2001 From: Miguel Grinberg Date: Thu, 16 Jan 2025 17:02:16 +0000 Subject: [PATCH 09/11] review feedback --- elasticsearch/__init__.py | 4 +- elasticsearch/_async/helpers.py | 2 +- elasticsearch/_sync/client/utils.py | 2 +- elasticsearch/dsl/__init__.py | 3 - elasticsearch/dsl/_async/document.py | 4 +- elasticsearch/dsl/_sync/document.py | 4 +- elasticsearch/dsl/aggs.py | 2 +- elasticsearch/dsl/analysis.py | 12 ++-- elasticsearch/dsl/connections.py | 8 +-- elasticsearch/dsl/document_base.py | 8 +-- elasticsearch/dsl/faceted_search_base.py | 2 +- elasticsearch/dsl/field.py | 10 +-- elasticsearch/dsl/mapping_base.py | 2 +- elasticsearch/dsl/py.typed | 0 elasticsearch/dsl/query.py | 2 +- elasticsearch/dsl/response/__init__.py | 6 +- elasticsearch/dsl/response/aggs.py | 4 +- elasticsearch/dsl/types.py | 69 ++++++++++--------- elasticsearch/dsl/utils.py | 2 +- elasticsearch/helpers/actions.py | 2 +- examples/dsl/async/sparse_vectors.py | 2 +- examples/dsl/async/vectors.py | 2 +- examples/dsl/sparse_vectors.py | 2 +- examples/dsl/vectors.py | 2 +- noxfile.py | 8 ++- .../test_dsl/test_connections.py | 6 +- utils/templates/aggs.py.tpl | 2 +- utils/templates/query.py.tpl | 2 +- utils/templates/response.__init__.py.tpl | 6 +- utils/templates/types.py.tpl | 2 +- 30 files changed, 91 insertions(+), 91 deletions(-) delete mode 100644 elasticsearch/dsl/py.typed diff --git a/elasticsearch/__init__.py b/elasticsearch/__init__.py index 723b3a2b7..c2277228a 100644 --- a/elasticsearch/__init__.py +++ b/elasticsearch/__init__.py @@ -27,7 +27,7 @@ from ._version import __versionstr__ # Ensure that a compatible version of elastic-transport is installed. -_version_groups = tuple(int(x) for x in re.search(r"^(\d+)\.(\d+)\.(\d+)", _elastic_transport_version).groups()) # type: ignore +_version_groups = tuple(int(x) for x in re.search(r"^(\d+)\.(\d+)\.(\d+)", _elastic_transport_version).groups()) # type: ignore[union-attr] if _version_groups < (8, 0, 0) or _version_groups > (9, 0, 0): raise ImportError( "An incompatible version of elastic-transport is installed. Must be between " @@ -35,7 +35,7 @@ "$ python -m pip install 'elastic-transport>=8, <9'" ) -_version_groups = re.search(r"^(\d+)\.(\d+)\.(\d+)", __versionstr__).groups() # type: ignore +_version_groups = re.search(r"^(\d+)\.(\d+)\.(\d+)", __versionstr__).groups() # type: ignore[assignment, union-attr] _major, _minor, _patch = (int(x) for x in _version_groups) VERSION = __version__ = (_major, _minor, _patch) diff --git a/elasticsearch/_async/helpers.py b/elasticsearch/_async/helpers.py index 1bc339917..4c53f0bbe 100644 --- a/elasticsearch/_async/helpers.py +++ b/elasticsearch/_async/helpers.py @@ -257,7 +257,7 @@ async def map_actions() -> AsyncIterable[_TYPE_BULK_ACTION_HEADER_AND_BODY]: ] ok: bool info: Dict[str, Any] - async for data, (ok, info) in azip( # type: ignore + async for data, (ok, info) in azip( # type: ignore[assignment, misc] bulk_data, _process_bulk_chunk( client, diff --git a/elasticsearch/_sync/client/utils.py b/elasticsearch/_sync/client/utils.py index c5ec21dae..9f957987c 100644 --- a/elasticsearch/_sync/client/utils.py +++ b/elasticsearch/_sync/client/utils.py @@ -232,7 +232,7 @@ def host_mapping_to_node_config(host: Mapping[str, Union[str, int]]) -> NodeConf ) options["path_prefix"] = options.pop("url_prefix") - return NodeConfig(**options) # type: ignore + return NodeConfig(**options) # type: ignore[arg-type] def cloud_id_to_node_configs(cloud_id: str) -> List[NodeConfig]: diff --git a/elasticsearch/dsl/__init__.py b/elasticsearch/dsl/__init__.py index a91e84424..860e2b761 100644 --- a/elasticsearch/dsl/__init__.py +++ b/elasticsearch/dsl/__init__.py @@ -104,9 +104,6 @@ from .utils import AttrDict, AttrList, DslBase from .wrappers import Range -VERSION = (8, 17, 1) -__version__ = VERSION -__versionstr__ = ".".join(map(str, VERSION)) __all__ = [ "A", "Agg", diff --git a/elasticsearch/dsl/_async/document.py b/elasticsearch/dsl/_async/document.py index c7ece2dea..4b7654761 100644 --- a/elasticsearch/dsl/_async/document.py +++ b/elasticsearch/dsl/_async/document.py @@ -241,11 +241,11 @@ async def mget( error_ids = [doc["_id"] for doc in error_docs] message = "Required routing not provided for documents %s." message %= ", ".join(error_ids) - raise RequestError(400, message, error_docs) # type: ignore + raise RequestError(400, message, error_docs) # type: ignore[arg-type] if missing_docs: missing_ids = [doc["_id"] for doc in missing_docs] message = f"Documents {', '.join(missing_ids)} not found." - raise NotFoundError(404, message, {"docs": missing_docs}) # type: ignore + raise NotFoundError(404, message, {"docs": missing_docs}) # type: ignore[arg-type] return objs async def delete( diff --git a/elasticsearch/dsl/_sync/document.py b/elasticsearch/dsl/_sync/document.py index 3444563ad..316ece5cb 100644 --- a/elasticsearch/dsl/_sync/document.py +++ b/elasticsearch/dsl/_sync/document.py @@ -235,11 +235,11 @@ def mget( error_ids = [doc["_id"] for doc in error_docs] message = "Required routing not provided for documents %s." message %= ", ".join(error_ids) - raise RequestError(400, message, error_docs) # type: ignore + raise RequestError(400, message, error_docs) # type: ignore[arg-type] if missing_docs: missing_ids = [doc["_id"] for doc in missing_docs] message = f"Documents {', '.join(missing_ids)} not found." - raise NotFoundError(404, message, {"docs": missing_docs}) # type: ignore + raise NotFoundError(404, message, {"docs": missing_docs}) # type: ignore[arg-type] return objs def delete( diff --git a/elasticsearch/dsl/aggs.py b/elasticsearch/dsl/aggs.py index 6c51c3ace..ba5150803 100644 --- a/elasticsearch/dsl/aggs.py +++ b/elasticsearch/dsl/aggs.py @@ -193,7 +193,7 @@ def pipeline( return cast("Pipeline[_R]", self._agg(False, name, agg_type, *args, **params)) def result(self, search: "SearchBase[_R]", data: Any) -> AttrDict[Any]: - return BucketData(self, search, data) # type: ignore + return BucketData(self, search, data) # type: ignore[arg-type] class Bucket(AggBase[_R], Agg[_R]): diff --git a/elasticsearch/dsl/analysis.py b/elasticsearch/dsl/analysis.py index a810064e0..bc5ee6456 100644 --- a/elasticsearch/dsl/analysis.py +++ b/elasticsearch/dsl/analysis.py @@ -37,9 +37,9 @@ def _type_shortcut( return name_or_instance # type: ignore[return-value] if not (type or kwargs): - return cls.get_dsl_class("builtin")(name_or_instance) # type: ignore + return cls.get_dsl_class("builtin")(name_or_instance) # type: ignore[no-any-return, attr-defined] - return cls.get_dsl_class(type, "custom")( # type: ignore + return cls.get_dsl_class(type, "custom")( # type: ignore[no-any-return, attr-defined] name_or_instance, type or "custom", **kwargs ) @@ -54,13 +54,13 @@ def __init__(self, filter_name: str, builtin_type: str = "custom", **kwargs: Any def to_dict(self) -> Dict[str, Any]: # only name to present in lists - return self._name # type: ignore + return self._name # type: ignore[return-value] def get_definition(self) -> Dict[str, Any]: - d = super().to_dict() # type: ignore + d = super().to_dict() # type: ignore[misc] d = d.pop(self.name) d["type"] = self._builtin_type - return d # type: ignore + return d # type: ignore[no-any-return] class CustomAnalysisDefinition(CustomAnalysis): @@ -111,7 +111,7 @@ def __init__(self, name: str): def to_dict(self) -> Dict[str, Any]: # only name to present in lists - return self._name # type: ignore + return self._name # type: ignore[return-value] class Analyzer(AnalysisBase, DslBase): diff --git a/elasticsearch/dsl/connections.py b/elasticsearch/dsl/connections.py index a3d340967..ff3243797 100644 --- a/elasticsearch/dsl/connections.py +++ b/elasticsearch/dsl/connections.py @@ -116,16 +116,16 @@ def get_connection(self, alias: Union[str, _T] = "default") -> _T: raise KeyError(f"There is no connection with alias {alias!r}.") def _with_user_agent(self, conn: _T) -> _T: - from . import __versionstr__ # this is here to avoid circular imports + from elasticsearch import ( + __versionstr__, # this is here to avoid circular imports + ) # try to inject our user agent if hasattr(conn, "_headers"): is_frozen = conn._headers.frozen if is_frozen: conn._headers = conn._headers.copy() - conn._headers.update( - {"user-agent": f"elasticsearch-dsl-py/{__versionstr__}"} - ) + conn._headers.update({"user-agent": f"elasticsearch-py/{__versionstr__}"}) if is_frozen: conn._headers.freeze() return conn diff --git a/elasticsearch/dsl/document_base.py b/elasticsearch/dsl/document_base.py index b4f8d67e5..b5e373741 100644 --- a/elasticsearch/dsl/document_base.py +++ b/elasticsearch/dsl/document_base.py @@ -36,7 +36,7 @@ try: from types import UnionType except ImportError: - UnionType = None # type: ignore + UnionType = None # type: ignore[assignment, misc] from typing_extensions import dataclass_transform @@ -81,14 +81,14 @@ def __init__(self, name: str, field: Field): def __getattr__(self, attr: str) -> "InstrumentedField": try: # first let's see if this is an attribute of this object - return super().__getattribute__(attr) # type: ignore + return super().__getattribute__(attr) # type: ignore[no-any-return] except AttributeError: try: # next we see if we have a sub-field with this name return InstrumentedField(f"{self._name}.{attr}", self._field[attr]) except KeyError: # lastly we let the wrapped field resolve this attribute - return getattr(self._field, attr) # type: ignore + return getattr(self._field, attr) # type: ignore[no-any-return] def __pos__(self) -> str: """Return the field name representation for ascending sort order""" @@ -226,7 +226,7 @@ def __init__(self, name: str, bases: Tuple[type, ...], attrs: Dict[str, Any]): field_args = [type_] elif type_ in self.type_annotation_map: # use best field type for the type hint provided - field, field_kwargs = self.type_annotation_map[type_] # type: ignore + field, field_kwargs = self.type_annotation_map[type_] # type: ignore[assignment] if field: field_kwargs = { diff --git a/elasticsearch/dsl/faceted_search_base.py b/elasticsearch/dsl/faceted_search_base.py index ee6fed2f9..5caa041bf 100644 --- a/elasticsearch/dsl/faceted_search_base.py +++ b/elasticsearch/dsl/faceted_search_base.py @@ -93,7 +93,7 @@ def add_filter(self, filter_values: List[FilterValueType]) -> Optional[Query]: f |= self.get_value_filter(v) return f - def get_value_filter(self, filter_value: FilterValueType) -> Query: # type: ignore + def get_value_filter(self, filter_value: FilterValueType) -> Query: # type: ignore[empty-body] """ Construct a filter for an individual value """ diff --git a/elasticsearch/dsl/field.py b/elasticsearch/dsl/field.py index 55ab4f7f9..8f9dd2ff7 100644 --- a/elasticsearch/dsl/field.py +++ b/elasticsearch/dsl/field.py @@ -378,7 +378,7 @@ def clean(self, data: Any) -> Optional[bool]: data = self.deserialize(data) if data is None and self._required: raise ValidationException("Value required for this field.") - return data # type: ignore + return data # type: ignore[no-any-return] class Float(Field): @@ -515,12 +515,12 @@ class Percolator(Field): _coerce = True def _deserialize(self, data: Any) -> "Query": - return Q(data) # type: ignore + return Q(data) # type: ignore[no-any-return] def _serialize(self, data: Any) -> Optional[Dict[str, Any]]: if data is None: return None - return data.to_dict() # type: ignore + return data.to_dict() # type: ignore[no-any-return] class RangeField(Field): @@ -530,7 +530,7 @@ class RangeField(Field): def _deserialize(self, data: Any) -> Range["_SupportsComparison"]: if isinstance(data, Range): return data - data = {k: self._core_field.deserialize(v) for k, v in data.items()} # type: ignore + data = {k: self._core_field.deserialize(v) for k, v in data.items()} # type: ignore[union-attr] return Range(data) def _serialize(self, data: Any) -> Optional[Dict[str, Any]]: @@ -538,7 +538,7 @@ def _serialize(self, data: Any) -> Optional[Dict[str, Any]]: return None if not isinstance(data, collections.abc.Mapping): data = data.to_dict() - return {k: self._core_field.serialize(v) for k, v in data.items()} # type: ignore + return {k: self._core_field.serialize(v) for k, v in data.items()} # type: ignore[union-attr] class IntegerRange(RangeField): diff --git a/elasticsearch/dsl/mapping_base.py b/elasticsearch/dsl/mapping_base.py index 658cf6cfc..cb8110fd1 100644 --- a/elasticsearch/dsl/mapping_base.py +++ b/elasticsearch/dsl/mapping_base.py @@ -64,7 +64,7 @@ def field(self, name: str, *args: Any, **kwargs: Any) -> Self: def _collect_fields(self) -> Iterator[Field]: """Iterate over all Field objects within, including multi fields.""" - fields = cast(Dict[str, Field], self.properties.to_dict()) # type: ignore + fields = cast(Dict[str, Field], self.properties.to_dict()) # type: ignore[attr-defined] for f in fields.values(): yield f # multi fields diff --git a/elasticsearch/dsl/py.typed b/elasticsearch/dsl/py.typed deleted file mode 100644 index e69de29bb..000000000 diff --git a/elasticsearch/dsl/query.py b/elasticsearch/dsl/query.py index 0a7d288c8..b5808959c 100644 --- a/elasticsearch/dsl/query.py +++ b/elasticsearch/dsl/query.py @@ -637,7 +637,7 @@ def __init__( functions = [] for name in ScoreFunction._classes: if name in kwargs: - functions.append({name: kwargs.pop(name)}) # type: ignore + functions.append({name: kwargs.pop(name)}) # type: ignore[arg-type] super().__init__( boost_mode=boost_mode, functions=functions, diff --git a/elasticsearch/dsl/response/__init__.py b/elasticsearch/dsl/response/__init__.py index eea1b87f9..f6f3d551d 100644 --- a/elasticsearch/dsl/response/__init__.py +++ b/elasticsearch/dsl/response/__init__.py @@ -197,9 +197,9 @@ def search_after(self) -> "SearchBase[_R]": """ if len(self.hits) == 0: raise ValueError("Cannot use search_after when there are no search results") - if not hasattr(self.hits[-1].meta, "sort"): # type: ignore + if not hasattr(self.hits[-1].meta, "sort"): # type: ignore[attr-defined] raise ValueError("Cannot use search_after when results are not sorted") - return self._search.extra(search_after=self.hits[-1].meta.sort) # type: ignore + return self._search.extra(search_after=self.hits[-1].meta.sort) # type: ignore[attr-defined] AggregateResponseType = Union[ @@ -293,7 +293,7 @@ def __getitem__(self, attr_name: str) -> AggregateResponseType: AggregateResponseType, agg.result(self._meta["search"], self._d_[attr_name]), ) - return super().__getitem__(attr_name) # type: ignore + return super().__getitem__(attr_name) # type: ignore[no-any-return] def __iter__(self) -> Iterator[AggregateResponseType]: # type: ignore[override] for name in self._meta["aggs"]: diff --git a/elasticsearch/dsl/response/aggs.py b/elasticsearch/dsl/response/aggs.py index 3525e1f92..8994fa761 100644 --- a/elasticsearch/dsl/response/aggs.py +++ b/elasticsearch/dsl/response/aggs.py @@ -63,7 +63,7 @@ def _wrap_bucket(self, data: Dict[str, Any]) -> Bucket[_R]: ) def __iter__(self) -> Iterator["Agg"]: # type: ignore[override] - return iter(self.buckets) # type: ignore + return iter(self.buckets) # type: ignore[arg-type] def __len__(self) -> int: return len(self.buckets) @@ -83,7 +83,7 @@ def buckets(self) -> Union[AttrDict[Any], AttrList[Any]]: if isinstance(bs, list): ret = AttrList(bs, obj_wrapper=self._wrap_bucket) else: - ret = AttrDict[Any]({k: self._wrap_bucket(bs[k]) for k in bs}) # type: ignore + ret = AttrDict[Any]({k: self._wrap_bucket(bs[k]) for k in bs}) # type: ignore[assignment] super(AttrDict, self).__setattr__("_buckets", ret) return self._buckets diff --git a/elasticsearch/dsl/types.py b/elasticsearch/dsl/types.py index 756f6ef2d..ce639c4ed 100644 --- a/elasticsearch/dsl/types.py +++ b/elasticsearch/dsl/types.py @@ -3965,7 +3965,7 @@ class AdjacencyMatrixAggregate(AttrDict[Any]): @property def buckets_as_dict(self) -> Mapping[str, "AdjacencyMatrixBucket"]: - return self.buckets # type: ignore + return self.buckets # type: ignore[return-value] class AdjacencyMatrixBucket(AttrDict[Any]): @@ -4135,7 +4135,7 @@ class AutoDateHistogramAggregate(AttrDict[Any]): @property def buckets_as_dict(self) -> Mapping[str, "DateHistogramBucket"]: - return self.buckets # type: ignore + return self.buckets # type: ignore[return-value] class AvgAggregate(AttrDict[Any]): @@ -4352,7 +4352,7 @@ class CompositeAggregate(AttrDict[Any]): @property def buckets_as_dict(self) -> Mapping[str, "CompositeBucket"]: - return self.buckets # type: ignore + return self.buckets # type: ignore[return-value] class CompositeBucket(AttrDict[Any]): @@ -4390,7 +4390,7 @@ class DateHistogramAggregate(AttrDict[Any]): @property def buckets_as_dict(self) -> Mapping[str, "DateHistogramBucket"]: - return self.buckets # type: ignore + return self.buckets # type: ignore[return-value] class DateHistogramBucket(AttrDict[Any]): @@ -4420,7 +4420,7 @@ class DateRangeAggregate(AttrDict[Any]): @property def buckets_as_dict(self) -> Mapping[str, "RangeBucket"]: - return self.buckets # type: ignore + return self.buckets # type: ignore[return-value] class DerivativeAggregate(AttrDict[Any]): @@ -4525,7 +4525,7 @@ class DoubleTermsAggregate(AttrDict[Any]): @property def buckets_as_dict(self) -> Mapping[str, "DoubleTermsBucket"]: - return self.buckets # type: ignore + return self.buckets # type: ignore[return-value] class DoubleTermsBucket(AttrDict[Any]): @@ -4549,7 +4549,7 @@ class ErrorCause(AttrDict[Any]): provided, that depend on the error type. :arg type: (required) The type of error - :arg reason: A human-readable explanation of the error, in english + :arg reason: A human-readable explanation of the error, in English. :arg stack_trace: The server stack trace. Present only if the `error_trace=true` parameter was sent with the request. :arg caused_by: @@ -4768,7 +4768,7 @@ class FiltersAggregate(AttrDict[Any]): @property def buckets_as_dict(self) -> Mapping[str, "FiltersBucket"]: - return self.buckets # type: ignore + return self.buckets # type: ignore[return-value] class FiltersBucket(AttrDict[Any]): @@ -4790,7 +4790,7 @@ class FrequentItemSetsAggregate(AttrDict[Any]): @property def buckets_as_dict(self) -> Mapping[str, "FrequentItemSetsBucket"]: - return self.buckets # type: ignore + return self.buckets # type: ignore[return-value] class FrequentItemSetsBucket(AttrDict[Any]): @@ -4846,7 +4846,7 @@ class GeoDistanceAggregate(AttrDict[Any]): @property def buckets_as_dict(self) -> Mapping[str, "RangeBucket"]: - return self.buckets # type: ignore + return self.buckets # type: ignore[return-value] class GeoHashGridAggregate(AttrDict[Any]): @@ -4860,7 +4860,7 @@ class GeoHashGridAggregate(AttrDict[Any]): @property def buckets_as_dict(self) -> Mapping[str, "GeoHashGridBucket"]: - return self.buckets # type: ignore + return self.buckets # type: ignore[return-value] class GeoHashGridBucket(AttrDict[Any]): @@ -4884,7 +4884,7 @@ class GeoHexGridAggregate(AttrDict[Any]): @property def buckets_as_dict(self) -> Mapping[str, "GeoHexGridBucket"]: - return self.buckets # type: ignore + return self.buckets # type: ignore[return-value] class GeoHexGridBucket(AttrDict[Any]): @@ -4934,7 +4934,7 @@ class GeoTileGridAggregate(AttrDict[Any]): @property def buckets_as_dict(self) -> Mapping[str, "GeoTileGridBucket"]: - return self.buckets # type: ignore + return self.buckets # type: ignore[return-value] class GeoTileGridBucket(AttrDict[Any]): @@ -4988,7 +4988,7 @@ class HistogramAggregate(AttrDict[Any]): @property def buckets_as_dict(self) -> Mapping[str, "HistogramBucket"]: - return self.buckets # type: ignore + return self.buckets # type: ignore[return-value] class HistogramBucket(AttrDict[Any]): @@ -5133,7 +5133,7 @@ class IpPrefixAggregate(AttrDict[Any]): @property def buckets_as_dict(self) -> Mapping[str, "IpPrefixBucket"]: - return self.buckets # type: ignore + return self.buckets # type: ignore[return-value] class IpPrefixBucket(AttrDict[Any]): @@ -5163,7 +5163,7 @@ class IpRangeAggregate(AttrDict[Any]): @property def buckets_as_dict(self) -> Mapping[str, "IpRangeBucket"]: - return self.buckets # type: ignore + return self.buckets # type: ignore[return-value] class IpRangeBucket(AttrDict[Any]): @@ -5276,7 +5276,7 @@ class LongRareTermsAggregate(AttrDict[Any]): @property def buckets_as_dict(self) -> Mapping[str, "LongRareTermsBucket"]: - return self.buckets # type: ignore + return self.buckets # type: ignore[return-value] class LongRareTermsBucket(AttrDict[Any]): @@ -5309,7 +5309,7 @@ class LongTermsAggregate(AttrDict[Any]): @property def buckets_as_dict(self) -> Mapping[str, "LongTermsBucket"]: - return self.buckets # type: ignore + return self.buckets # type: ignore[return-value] class LongTermsBucket(AttrDict[Any]): @@ -5427,7 +5427,7 @@ class MultiTermsAggregate(AttrDict[Any]): @property def buckets_as_dict(self) -> Mapping[str, "MultiTermsBucket"]: - return self.buckets # type: ignore + return self.buckets # type: ignore[return-value] class MultiTermsBucket(AttrDict[Any]): @@ -5595,7 +5595,7 @@ class RangeAggregate(AttrDict[Any]): @property def buckets_as_dict(self) -> Mapping[str, "RangeBucket"]: - return self.buckets # type: ignore + return self.buckets # type: ignore[return-value] class RangeBucket(AttrDict[Any]): @@ -5722,11 +5722,12 @@ class ShardProfile(AttrDict[Any]): class ShardStatistics(AttrDict[Any]): """ - :arg failed: (required) - :arg successful: (required) Indicates how many shards have - successfully run the search. - :arg total: (required) Indicates how many shards the search will run - on overall. + :arg failed: (required) The number of shards the operation or search + attempted to run on but failed. + :arg successful: (required) The number of shards the operation or + search succeeded on. + :arg total: (required) The number of shards the operation or search + will run on overall. :arg failures: :arg skipped: """ @@ -5753,7 +5754,7 @@ class SignificantLongTermsAggregate(AttrDict[Any]): @property def buckets_as_dict(self) -> Mapping[str, "SignificantLongTermsBucket"]: - return self.buckets # type: ignore + return self.buckets # type: ignore[return-value] class SignificantLongTermsBucket(AttrDict[Any]): @@ -5787,7 +5788,7 @@ class SignificantStringTermsAggregate(AttrDict[Any]): @property def buckets_as_dict(self) -> Mapping[str, "SignificantStringTermsBucket"]: - return self.buckets # type: ignore + return self.buckets # type: ignore[return-value] class SignificantStringTermsBucket(AttrDict[Any]): @@ -5922,7 +5923,7 @@ class StringRareTermsAggregate(AttrDict[Any]): @property def buckets_as_dict(self) -> Mapping[str, "StringRareTermsBucket"]: - return self.buckets # type: ignore + return self.buckets # type: ignore[return-value] class StringRareTermsBucket(AttrDict[Any]): @@ -5978,7 +5979,7 @@ class StringTermsAggregate(AttrDict[Any]): @property def buckets_as_dict(self) -> Mapping[str, "StringTermsBucket"]: - return self.buckets # type: ignore + return self.buckets # type: ignore[return-value] class StringTermsBucket(AttrDict[Any]): @@ -6083,7 +6084,7 @@ class TimeSeriesAggregate(AttrDict[Any]): @property def buckets_as_dict(self) -> Mapping[str, "TimeSeriesBucket"]: - return self.buckets # type: ignore + return self.buckets # type: ignore[return-value] class TimeSeriesBucket(AttrDict[Any]): @@ -6150,7 +6151,7 @@ class UnmappedRareTermsAggregate(AttrDict[Any]): @property def buckets_as_dict(self) -> Mapping[str, Any]: - return self.buckets # type: ignore + return self.buckets # type: ignore[return-value] class UnmappedSamplerAggregate(AttrDict[Any]): @@ -6181,7 +6182,7 @@ class UnmappedSignificantTermsAggregate(AttrDict[Any]): @property def buckets_as_dict(self) -> Mapping[str, Any]: - return self.buckets # type: ignore + return self.buckets # type: ignore[return-value] class UnmappedTermsAggregate(AttrDict[Any]): @@ -6202,7 +6203,7 @@ class UnmappedTermsAggregate(AttrDict[Any]): @property def buckets_as_dict(self) -> Mapping[str, Any]: - return self.buckets # type: ignore + return self.buckets # type: ignore[return-value] class ValueCountAggregate(AttrDict[Any]): @@ -6232,7 +6233,7 @@ class VariableWidthHistogramAggregate(AttrDict[Any]): @property def buckets_as_dict(self) -> Mapping[str, "VariableWidthHistogramBucket"]: - return self.buckets # type: ignore + return self.buckets # type: ignore[return-value] class VariableWidthHistogramBucket(AttrDict[Any]): diff --git a/elasticsearch/dsl/utils.py b/elasticsearch/dsl/utils.py index fb5848e20..b52ec63a0 100644 --- a/elasticsearch/dsl/utils.py +++ b/elasticsearch/dsl/utils.py @@ -664,7 +664,7 @@ def merge( and isinstance(data[key], (AttrDict, collections.abc.Mapping)) and isinstance(value, (AttrDict, collections.abc.Mapping)) ): - merge(data[key], value, raise_on_conflict) # type: ignore + merge(data[key], value, raise_on_conflict) # type: ignore[arg-type] elif key in data and data[key] != value and raise_on_conflict: raise ValueError(f"Incompatible data for key {key!r}, cannot be merged.") else: diff --git a/elasticsearch/helpers/actions.py b/elasticsearch/helpers/actions.py index 687bf4b84..d1a43a8dc 100644 --- a/elasticsearch/helpers/actions.py +++ b/elasticsearch/helpers/actions.py @@ -593,7 +593,7 @@ def parallel_bulk( class BlockingPool(ThreadPool): def _setup_queues(self) -> None: - super()._setup_queues() # type: ignore + super()._setup_queues() # type: ignore[misc] # The queue must be at least the size of the number of threads to # prevent hanging when inserting sentinel values during teardown. self._inqueue: Queue[ diff --git a/examples/dsl/async/sparse_vectors.py b/examples/dsl/async/sparse_vectors.py index 4c0cfc1ac..64ae63245 100644 --- a/examples/dsl/async/sparse_vectors.py +++ b/examples/dsl/async/sparse_vectors.py @@ -20,7 +20,7 @@ Requirements: -$ pip install nltk tqdm elasticsearch[async] +$ pip install nltk tqdm "elasticsearch[async]" Before running this example, the ELSER v2 model must be downloaded and deployed to the Elasticsearch cluster, and an ingest pipeline must be defined. This can diff --git a/examples/dsl/async/vectors.py b/examples/dsl/async/vectors.py index 1dc183f53..15829df9a 100644 --- a/examples/dsl/async/vectors.py +++ b/examples/dsl/async/vectors.py @@ -20,7 +20,7 @@ Requirements: -$ pip install nltk sentence_transformers tqdm elasticsearch[async] +$ pip install nltk sentence_transformers tqdm "elasticsearch[async]" To run the example: diff --git a/examples/dsl/sparse_vectors.py b/examples/dsl/sparse_vectors.py index c328769eb..a92e82026 100644 --- a/examples/dsl/sparse_vectors.py +++ b/examples/dsl/sparse_vectors.py @@ -20,7 +20,7 @@ Requirements: -$ pip install nltk tqdm elasticsearch +$ pip install nltk tqdm "elasticsearch" Before running this example, the ELSER v2 model must be downloaded and deployed to the Elasticsearch cluster, and an ingest pipeline must be defined. This can diff --git a/examples/dsl/vectors.py b/examples/dsl/vectors.py index b4c700b71..3afd76991 100644 --- a/examples/dsl/vectors.py +++ b/examples/dsl/vectors.py @@ -20,7 +20,7 @@ Requirements: -$ pip install nltk sentence_transformers tqdm elasticsearch +$ pip install nltk sentence_transformers tqdm "elasticsearch" To run the example: diff --git a/noxfile.py b/noxfile.py index d25d4dc2b..83d749f57 100644 --- a/noxfile.py +++ b/noxfile.py @@ -66,9 +66,7 @@ def test_otel(session): @nox.session() def format(session): - session.install( - "black~=24.0", "isort", "flynt", "unasync>=0.6.0", "jinja2", "elastic-transport" - ) + session.install(".", "black~=24.0", "isort", "flynt", "unasync>=0.6.0", "jinja2") session.run("python", "utils/run-unasync.py") session.run("python", "utils/run-unasync-dsl.py") @@ -114,6 +112,7 @@ def lint(session): "--implicit-reexport", "--explicit-package-bases", "--show-error-codes", + "--enable-error-code=ignore-without-code", "elasticsearch/", ) session.run( @@ -128,12 +127,15 @@ def lint(session): "--show-error-codes", "test_elasticsearch/test_types/async_types.py", ) + + # check typing on the DSL examples session.run( "mypy", "--strict", "--implicit-reexport", "--explicit-package-bases", "--show-error-codes", + "--enable-error-code=ignore-without-code", "examples/dsl/", ) diff --git a/test_elasticsearch/test_dsl/test_connections.py b/test_elasticsearch/test_dsl/test_connections.py index dcaa59a98..b580f3e73 100644 --- a/test_elasticsearch/test_dsl/test_connections.py +++ b/test_elasticsearch/test_dsl/test_connections.py @@ -124,19 +124,19 @@ def test_connection_has_correct_user_agent() -> None: assert ( c.get_connection("testing") ._headers["user-agent"] - .startswith("elasticsearch-dsl-py/") + .startswith("elasticsearch-py/") ) my_client = Elasticsearch(hosts=["http://localhost:9200"]) my_client = my_client.options(headers={"user-agent": "my-user-agent/1.0"}) c.add_connection("default", my_client) - assert c.get_connection()._headers["user-agent"].startswith("elasticsearch-dsl-py/") + assert c.get_connection()._headers["user-agent"].startswith("elasticsearch-py/") my_client = Elasticsearch(hosts=["http://localhost:9200"]) assert ( c.get_connection(my_client) ._headers["user-agent"] - .startswith("elasticsearch-dsl-py/") + .startswith("elasticsearch-py/") ) not_a_client = object() diff --git a/utils/templates/aggs.py.tpl b/utils/templates/aggs.py.tpl index ab2103aa5..d4ba4f4cd 100644 --- a/utils/templates/aggs.py.tpl +++ b/utils/templates/aggs.py.tpl @@ -192,7 +192,7 @@ class AggBase(Generic[_R]): return cast("Pipeline[_R]", self._agg(False, name, agg_type, *args, **params)) def result(self, search: "SearchBase[_R]", data: Any) -> AttrDict[Any]: - return BucketData(self, search, data) # type: ignore + return BucketData(self, search, data) # type: ignore[arg-type] class Bucket(AggBase[_R], Agg[_R]): diff --git a/utils/templates/query.py.tpl b/utils/templates/query.py.tpl index dd26287cf..6816f2d07 100644 --- a/utils/templates/query.py.tpl +++ b/utils/templates/query.py.tpl @@ -201,7 +201,7 @@ class {{ k.name }}({{ parent }}): functions = [] for name in ScoreFunction._classes: if name in kwargs: - functions.append({name: kwargs.pop(name)}) # type: ignore + functions.append({name: kwargs.pop(name)}) # type: ignore[arg-type] {% elif k.is_single_field %} if _field is not DEFAULT: kwargs[str(_field)] = _value diff --git a/utils/templates/response.__init__.py.tpl b/utils/templates/response.__init__.py.tpl index f9ae5c4ef..991249227 100644 --- a/utils/templates/response.__init__.py.tpl +++ b/utils/templates/response.__init__.py.tpl @@ -168,9 +168,9 @@ class Response(AttrDict[Any], Generic[_R]): """ if len(self.hits) == 0: raise ValueError("Cannot use search_after when there are no search results") - if not hasattr(self.hits[-1].meta, "sort"): # type: ignore + if not hasattr(self.hits[-1].meta, "sort"): # type: ignore[attr-defined] raise ValueError("Cannot use search_after when results are not sorted") - return self._search.extra(search_after=self.hits[-1].meta.sort) # type: ignore + return self._search.extra(search_after=self.hits[-1].meta.sort) # type: ignore[attr-defined] AggregateResponseType = {{ response["aggregate_type"] }} @@ -189,7 +189,7 @@ class AggResponse(AttrDict[Any], Generic[_R]): # don't do self._meta['aggs'][attr_name] to avoid copying agg = self._meta["aggs"].aggs[attr_name] return cast(AggregateResponseType, agg.result(self._meta["search"], self._d_[attr_name])) - return super().__getitem__(attr_name) # type: ignore + return super().__getitem__(attr_name) # type: ignore[no-any-return] def __iter__(self) -> Iterator[AggregateResponseType]: # type: ignore[override] for name in self._meta["aggs"]: diff --git a/utils/templates/types.py.tpl b/utils/templates/types.py.tpl index 776b8df4f..4ee80d5cb 100644 --- a/utils/templates/types.py.tpl +++ b/utils/templates/types.py.tpl @@ -98,7 +98,7 @@ class {{ k.name }}({{ k.parent if k.parent else "AttrDict[Any]" }}): @property def buckets_as_dict(self) -> Mapping[str, {{ k.buckets_as_dict }}]: - return self.buckets # type: ignore + return self.buckets # type: ignore[return-value] {% endif %} {% else %} pass From e6e6b28a79eec6b224572caca41949f2dff9c147 Mon Sep 17 00:00:00 2001 From: Miguel Grinberg Date: Fri, 17 Jan 2025 10:40:54 +0000 Subject: [PATCH 10/11] 2nd round of feedback addressed --- elasticsearch/dsl/connections.py | 10 ++++------ test_elasticsearch/test_dsl/test_connections.py | 6 +++--- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/elasticsearch/dsl/connections.py b/elasticsearch/dsl/connections.py index ff3243797..8acd80c6e 100644 --- a/elasticsearch/dsl/connections.py +++ b/elasticsearch/dsl/connections.py @@ -17,7 +17,7 @@ from typing import Any, Dict, Generic, Type, TypeVar, Union -from elasticsearch import Elasticsearch +from elasticsearch import Elasticsearch, __versionstr__ from .serializer import serializer @@ -116,16 +116,14 @@ def get_connection(self, alias: Union[str, _T] = "default") -> _T: raise KeyError(f"There is no connection with alias {alias!r}.") def _with_user_agent(self, conn: _T) -> _T: - from elasticsearch import ( - __versionstr__, # this is here to avoid circular imports - ) - # try to inject our user agent if hasattr(conn, "_headers"): is_frozen = conn._headers.frozen if is_frozen: conn._headers = conn._headers.copy() - conn._headers.update({"user-agent": f"elasticsearch-py/{__versionstr__}"}) + conn._headers.update( + {"user-agent": f"elasticsearch-dsl-py/{__versionstr__}"} + ) if is_frozen: conn._headers.freeze() return conn diff --git a/test_elasticsearch/test_dsl/test_connections.py b/test_elasticsearch/test_dsl/test_connections.py index b580f3e73..dcaa59a98 100644 --- a/test_elasticsearch/test_dsl/test_connections.py +++ b/test_elasticsearch/test_dsl/test_connections.py @@ -124,19 +124,19 @@ def test_connection_has_correct_user_agent() -> None: assert ( c.get_connection("testing") ._headers["user-agent"] - .startswith("elasticsearch-py/") + .startswith("elasticsearch-dsl-py/") ) my_client = Elasticsearch(hosts=["http://localhost:9200"]) my_client = my_client.options(headers={"user-agent": "my-user-agent/1.0"}) c.add_connection("default", my_client) - assert c.get_connection()._headers["user-agent"].startswith("elasticsearch-py/") + assert c.get_connection()._headers["user-agent"].startswith("elasticsearch-dsl-py/") my_client = Elasticsearch(hosts=["http://localhost:9200"]) assert ( c.get_connection(my_client) ._headers["user-agent"] - .startswith("elasticsearch-py/") + .startswith("elasticsearch-dsl-py/") ) not_a_client = object() From e5d03586ce0ec217d012a41bac552fc0d3e1d26b Mon Sep 17 00:00:00 2001 From: Miguel Grinberg Date: Fri, 17 Jan 2025 10:56:58 +0000 Subject: [PATCH 11/11] fix coverage reports --- noxfile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/noxfile.py b/noxfile.py index 83d749f57..d3b8544aa 100644 --- a/noxfile.py +++ b/noxfile.py @@ -46,7 +46,7 @@ def pytest_argv(): @nox.session(python=["3.8", "3.9", "3.10", "3.11", "3.12", "3.13"]) def test(session): - session.install(".[dev]", env=INSTALL_ENV, silent=False) + session.install("-e", ".[dev]", env=INSTALL_ENV, silent=False) session.run(*pytest_argv(), *session.posargs)