Skip to content

Commit

Permalink
Subclass column meta (#468)
Browse files Browse the repository at this point in the history
* make it possible to subclass column meta

* update

* update documentation

* remove extra notebook field in documentation
  • Loading branch information
nanne-aben authored Aug 12, 2024
1 parent 32c4abf commit f022d18
Show file tree
Hide file tree
Showing 21 changed files with 144 additions and 26 deletions.
1 change: 1 addition & 0 deletions docs/source/advanced.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ Advanced Topics
.. toctree::

subclassing_schemas
subclass_column_meta
column_ambiguity
advanced_linting_support
dataset_implements
2 changes: 1 addition & 1 deletion docs/source/advanced_linting_support.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.2"
"version": "3.11.9"
}
},
"nbformat": 4,
Expand Down
2 changes: 1 addition & 1 deletion docs/source/autocomplete_in_notebooks.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.2"
"version": "3.11.9"
}
},
"nbformat": 4,
Expand Down
2 changes: 1 addition & 1 deletion docs/source/column_ambiguity.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -292,7 +292,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.2"
"version": "3.11.9"
}
},
"nbformat": 4,
Expand Down
2 changes: 1 addition & 1 deletion docs/source/complex_datatypes.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.2"
"version": "3.11.9"
}
},
"nbformat": 4,
Expand Down
2 changes: 1 addition & 1 deletion docs/source/create_empty_datasets.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.2"
"version": "3.11.9"
}
},
"nbformat": 4,
Expand Down
2 changes: 1 addition & 1 deletion docs/source/create_schema_in_notebook.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -321,7 +321,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.2"
"version": "3.11.9"
}
},
"nbformat": 4,
Expand Down
2 changes: 1 addition & 1 deletion docs/source/dataset_implements.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.2"
"version": "3.11.9"
}
},
"nbformat": 4,
Expand Down
2 changes: 1 addition & 1 deletion docs/source/documentation.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.2"
"version": "3.11.9"
}
},
"nbformat": 4,
Expand Down
16 changes: 8 additions & 8 deletions docs/source/loading_datasets_in_notebooks.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -309,7 +309,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 13,
"id": "acfd55af",
"metadata": {},
"outputs": [],
Expand All @@ -327,7 +327,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 14,
"id": "b0491ad1",
"metadata": {},
"outputs": [],
Expand All @@ -337,7 +337,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 15,
"id": "f7f41731",
"metadata": {},
"outputs": [],
Expand Down Expand Up @@ -367,7 +367,7 @@
},
{
"cell_type": "code",
"execution_count": 13,
"execution_count": 16,
"id": "264c2957",
"metadata": {},
"outputs": [
Expand All @@ -385,7 +385,7 @@
" age: Column[LongType]"
]
},
"execution_count": 13,
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -408,7 +408,7 @@
},
{
"cell_type": "code",
"execution_count": 14,
"execution_count": 17,
"id": "7199bee0",
"metadata": {},
"outputs": [
Expand All @@ -426,7 +426,7 @@
" age: Column[LongType]"
]
},
"execution_count": 14,
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
Expand Down Expand Up @@ -459,7 +459,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.2"
"version": "3.11.9"
}
},
"nbformat": 4,
Expand Down
2 changes: 1 addition & 1 deletion docs/source/schema_attributes.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.2"
"version": "3.11.9"
}
},
"nbformat": 4,
Expand Down
2 changes: 1 addition & 1 deletion docs/source/structtype_columns.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -273,7 +273,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.2"
"version": "3.11.9"
}
},
"nbformat": 4,
Expand Down
2 changes: 1 addition & 1 deletion docs/source/structtypes_in_notebooks.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.2"
"version": "3.11.9"
}
},
"nbformat": 4,
Expand Down
85 changes: 85 additions & 0 deletions docs/source/subclass_column_meta.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Defining your own ColumnMeta attributes\n",
"\n",
"In this notebook, we will see how to define your own `ColumnMeta` attributes. This is useful when you want to add some metadata to your columns that are not already defined in the `ColumnMeta` class."
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'id': {'comment': 'Identifies the person', 'primary_key': True},\n",
" 'name': {},\n",
" 'age': {}}"
]
},
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from dataclasses import dataclass\n",
"from typing import Annotated\n",
"from pyspark.sql.types import LongType, StringType\n",
"from typedspark import ColumnMeta, Schema\n",
"from typedspark._core.column import Column\n",
"\n",
"\n",
"@dataclass\n",
"class MyColumnMeta(ColumnMeta):\n",
" primary_key: bool = False\n",
"\n",
"\n",
"class Persons(Schema):\n",
" id: Annotated[\n",
" Column[LongType],\n",
" MyColumnMeta(\n",
" comment=\"Identifies the person\",\n",
" primary_key=True,\n",
" ),\n",
" ]\n",
" name: Column[StringType]\n",
" age: Column[LongType]\n",
"\n",
"\n",
"Persons.get_metadata()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "typedspark",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.2"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
2 changes: 1 addition & 1 deletion docs/source/subclassing_schemas.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.2"
"version": "3.11.9"
}
},
"nbformat": 4,
Expand Down
2 changes: 1 addition & 1 deletion docs/source/transforming_datasets.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.2"
"version": "3.11.9"
}
},
"nbformat": 4,
Expand Down
2 changes: 1 addition & 1 deletion docs/source/type_checking.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.2"
"version": "3.11.9"
}
},
"nbformat": 4,
Expand Down
2 changes: 1 addition & 1 deletion docs/videos/ide.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.2"
"version": "3.11.9"
}
},
"nbformat": 4,
Expand Down
29 changes: 28 additions & 1 deletion tests/_core/test_column.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
from dataclasses import dataclass
from typing import Annotated

import pandas as pd
import pytest
from pyspark.sql import SparkSession
from pyspark.sql.functions import lit
from pyspark.sql.types import LongType, StringType

from typedspark import Column, Schema
from typedspark import Column, ColumnMeta, Schema
from typedspark._utils.create_dataset import create_partially_filled_dataset


Expand Down Expand Up @@ -41,3 +44,27 @@ def test_column_reference_without_spark_session():
def test_column_with_deprecated_dataframe_param(spark: SparkSession):
df = create_partially_filled_dataset(spark, A, {A.a: [1, 2, 3]})
Column("a", dataframe=df)


@dataclass
class MyColumnMeta(ColumnMeta):
primary_key: bool = False


class Persons(Schema):
id: Annotated[
Column[LongType],
MyColumnMeta(
comment="Identifies the person",
primary_key=True,
),
]
name: Column[StringType]
age: Column[LongType]


def test_get_metadata():
assert Persons.get_metadata()["id"] == {
"comment": "Identifies the person",
"primary_key": True,
}
5 changes: 3 additions & 2 deletions typedspark/_core/column_meta.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
"""Metadata for ``Column`` objects that can be accessed during runtime."""

from dataclasses import dataclass
from dataclasses import asdict, dataclass
from typing import Dict, Optional


Expand All @@ -23,4 +23,5 @@ class A(Schema):

def get_metadata(self) -> Optional[Dict[str, str]]:
"""Returns the metadata of this column."""
return {"comment": self.comment} if self.comment else None
res = {k: v for k, v in asdict(self).items() if v is not None}
return res if len(res) > 0 else None
4 changes: 4 additions & 0 deletions typedspark/_schema/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,10 @@ def get_schema_name(cls):
"""Returns the name with which the schema was initialized."""
return cls._original_name if cls._original_name else cls.__name__

def get_metadata(cls) -> dict[str, dict[str, Any]]:
"""Returns the metadata of each of the columns in the schema."""
return {field.name: field.metadata for field in cls.get_structtype().fields}


class Schema(Protocol, metaclass=MetaSchema):
# pylint: disable=empty-docstring
Expand Down

0 comments on commit f022d18

Please sign in to comment.