Skip to content

Commit

Permalink
Rewrite integration tests
Browse files Browse the repository at this point in the history
  • Loading branch information
ndrluis committed Jan 15, 2025
1 parent 6e2070d commit ba64764
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 32 deletions.
24 changes: 0 additions & 24 deletions dev/provision.py
Original file line number Diff line number Diff line change
Expand Up @@ -401,27 +401,3 @@
)
spark.sql(f"ALTER TABLE {catalog_name}.default.test_empty_scan_ordered_str WRITE ORDERED BY id")
spark.sql(f"INSERT INTO {catalog_name}.default.test_empty_scan_ordered_str VALUES 'a', 'c'")

spark.sql(
f"""
CREATE OR REPLACE TABLE {catalog_name}.default.test_table_statistics_operations (
number integer
)
USING iceberg
TBLPROPERTIES (
'format-version'='2'
);
"""
)
spark.sql(
f"""
INSERT INTO {catalog_name}.default.test_table_statistics_operations
VALUES (1)
"""
)
spark.sql(
f"""
INSERT INTO {catalog_name}.default.test_table_statistics_operations
VALUES (2)
"""
)
37 changes: 29 additions & 8 deletions tests/integration/test_statistics_operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,24 +14,45 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from typing import TYPE_CHECKING

import pytest

from pyiceberg.catalog import Catalog
from pyiceberg.exceptions import NoSuchTableError
from pyiceberg.table.statistics import BlobMetadata, StatisticsFile

if TYPE_CHECKING:
import pyarrow as pa

from pyiceberg.catalog import Catalog
from pyiceberg.schema import Schema
from pyiceberg.table import Table


def _create_table_with_schema(catalog: "Catalog", schema: "Schema") -> "Table":
tbl_name = "default.test_table_statistics_operations"

try:
catalog.drop_table(tbl_name)
except NoSuchTableError:
pass
return catalog.create_table(identifier=tbl_name, schema=schema)


@pytest.mark.integration
@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")])
def test_manage_statistics(catalog: Catalog) -> None:
identifier = "default.test_table_statistics_operations"
tbl = catalog.load_table(identifier)
def test_manage_statistics(catalog: "Catalog", arrow_table_with_null: "pa.Table") -> None:
tbl = _create_table_with_schema(catalog, arrow_table_with_null.schema)

tbl.append(arrow_table_with_null)
tbl.append(arrow_table_with_null)

add_snapshot_id_1 = tbl.history()[0].snapshot_id
add_snapshot_id_2 = tbl.history()[1].snapshot_id

def create_statistics_file(snapshot_id: int) -> StatisticsFile:
def create_statistics_file(snapshot_id: int, type_name: str) -> StatisticsFile:
blob_metadata = BlobMetadata(
type="boring-type",
type=type_name,
snapshot_id=snapshot_id,
sequence_number=2,
fields=[1],
Expand All @@ -48,8 +69,8 @@ def create_statistics_file(snapshot_id: int) -> StatisticsFile:

return statistics_file

statistics_file_snap_1 = create_statistics_file(add_snapshot_id_1)
statistics_file_snap_2 = create_statistics_file(add_snapshot_id_2)
statistics_file_snap_1 = create_statistics_file(add_snapshot_id_1, "apache-datasketches-theta-v1")
statistics_file_snap_2 = create_statistics_file(add_snapshot_id_2, "deletion-vector-v1")

with tbl.update_statistics() as update:
update.set_statistics(add_snapshot_id_1, statistics_file_snap_1)
Expand Down

0 comments on commit ba64764

Please sign in to comment.