Skip to content

Commit

Permalink
Support timestamp characteristics
Browse files Browse the repository at this point in the history
  • Loading branch information
marijababi committed Jun 4, 2024
1 parent 4f32272 commit b857f9a
Show file tree
Hide file tree
Showing 2 changed files with 210 additions and 1 deletion.
78 changes: 77 additions & 1 deletion src/volur/sdk/v1alpha2/sources/csv/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from datetime import datetime
from typing import Any, AsyncIterator, Literal

from google.type.date_pb2 import Date
from google.type.date_pb2 import Date, Datetime
from volur.pork.materials.v1alpha3 import material_pb2
from volur.pork.products.v1alpha3 import product_pb2
from volur.pork.shared.v1alpha1 import characteristic_pb2, quantity_pb2
Expand Down Expand Up @@ -374,3 +374,79 @@ def get_value(
raise ValueError(
f"provided value {_} in column {self.column_id} can not be interpreted as date characteristic" # noqa: E501
)


@dataclass
class CharacteristicColumnTimestamp(CharacteristicColumn):
timestamp_formats: list[str] = field(init=False)
default_timestamp_format: list[str] = field(
default_factory=lambda: [
"%d-%m-%Y %H:%M:%S",
"%Y-%m-%d %H:%M:%S",
"%Y/%m/%d %H:%M:%S",
"%d-%m-%Y %H:%M:%S",
"%d/%m/%Y %H:%M:%S",
"%d-%m-%Y %I:%M:%S %p",
"%Y-%m-%d %I:%M:%S %p",
"%Y/%m/%d %I:%M:%S %p",
"%d-%m-%Y %I:%M:%S %p",
"%d/%m/%Y %I:%M:%S %p",
"%d-%m-%Y %H:%M:%S.%f",
"%Y-%m-%d %H:%M:%S.%f",
"%Y/%m/%d %H:%M:%S.%f",
"%d-%m-%Y %H:%M:%S.%f",
"%d/%m/%Y %H:%M:%S.%f",
],
init=False,
)
extra_timestamp_formats: list[str] = field(default_factory=list)

def __post_init__(
self: "CharacteristicColumnDate",
column_name: str | int,
characteristic_name: str,
) -> None:
super().__post_init__(column_name, characteristic_name)
self.timestamp_formats = [
*self.default_timestamp_format,
*self.extra_timestamp_formats,
]

def get_value(
self: "CharacteristicColumnDate",
data: dict[str | int, Any],
) -> characteristic_pb2.Characteristic:
_ = data.get(self.column_id, None)
if _ is None or _ == "":
return characteristic_pb2.Characteristic(
name=self.characteristic_id,
value=characteristic_pb2.CharacteristicValue(),
)

elif isinstance(_, str):
for timestamp_format in self.timestamp_formats:
try:
parsed_timestamp = datetime.strptime(_, timestamp_format)

return characteristic_pb2.Characteristic(
name=self.characteristic_id,
value=characteristic_pb2.CharacteristicValue(
value_date=Datetime(
year=parsed_timestamp.year,
month=parsed_timestamp.month,
day=parsed_timestamp.day,
hours=parsed_timestamp.hour,
minutes=parsed_timestamp.minute,
seconds=parsed_timestamp.second,
)
),
)
except ValueError:
pass
raise ValueError(
f"provided value {_} in column {self.column_id} has invalid date format"
)
else:
raise ValueError(
f"provided value {_} in column {self.column_id} can not be interpreted as date characteristic" # noqa: E501
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
from typing import Any

import pytest
from google.type.date_pb2 import Datetime
from volur.pork.shared.v1alpha1 import characteristic_pb2
from volur.sdk.v1alpha2.sources.csv import CharacteristicColumnDate

ids = [
"return-correct-characteristic-for-a-correct-date-value-in-column1",
"return-correct-characteristic-for-a-correct-date-value-in-column2",
"return-empty-chracteristic-because-value-in-column-is-none",
"raise-exception-because-value-is-not-valid-date",
"raise-exception-because-value-is-not-string",
]

test_data = [
(
{
"column_name": "15-06-2021 15:30:34",
},
CharacteristicColumnDate(
column_name="column_name",
characteristic_name="characteristic_name",
),
characteristic_pb2.Characteristic(
name="characteristic_name",
value=characteristic_pb2.CharacteristicValue(
value_date=Datetime(year=2021, month=6, day=15, hours=15, minutes=30, seconds=34),
),
),
False,
None,
),
(
{
"column_name": "10/06/2018 03:30:15 PM",
},
CharacteristicColumnDate(
column_name="column_name",
characteristic_name="characteristic_name",
),
characteristic_pb2.Characteristic(
name="characteristic_name",
value=characteristic_pb2.CharacteristicValue(
value_date=Datetime(year=2018, month=6, day=10, hours=15, minutes=30, seconds=15),
),
),
False,
None,
),
(
{
"column_name": "10/06/2018 03:10:34.45",
},
CharacteristicColumnDate(
column_name="column_name",
characteristic_name="characteristic_name",
),
characteristic_pb2.Characteristic(
name="characteristic_name",
value=characteristic_pb2.CharacteristicValue(
value_date=Datetime(year=2018, month=6, day=10, hours=3, minutes=10, seconds=34),
),
),
False,
None,
),
(
{
"column_name": "",
},
CharacteristicColumnDate(
column_name="column_name",
characteristic_name="characteristic_name",
),
characteristic_pb2.Characteristic(
name="characteristic_name",
value=characteristic_pb2.CharacteristicValue(),
),
False,
None,
),
(
{
"column_name": "01-01-202",
},
CharacteristicColumnDate(
column_name="column_name",
characteristic_name="characteristic_name",
),
None,
True,
"provided value 01-01-202 in column column_name has invalid date format",
),
(
{
"column_name": 2020,
},
CharacteristicColumnDate(
column_name="column_name",
characteristic_name="characteristic_name",
),
None,
True,
"provided value 2020 in column column_name can not be interpreted as date characteristic", # noqa: E501
),
]


@pytest.mark.parametrize(
argnames=[
"data",
"column",
"expected",
"should_raise_an_exception",
"exception_text",
],
argvalues=test_data,
ids=ids,
)
def test_characteristic_column_date(
data: dict[str | int, Any],
column: CharacteristicColumnDate,
expected: characteristic_pb2.CharacteristicValue | None,
should_raise_an_exception: bool,
exception_text: str | None,
) -> None:
if should_raise_an_exception:
with pytest.raises(ValueError, match=exception_text):
actual = column.get_value(data)
else:
actual = column.get_value(data)
assert actual == expected

0 comments on commit b857f9a

Please sign in to comment.