diff --git a/CHANGELOG.md b/CHANGELOG.md index 4a6bd5d..0c634e8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,7 +1,14 @@ # Changelog -## 0.2.2 +## 1.0.0 + +### New + +* Describe the Met Office Historic Station metadata in datapackage.json. [Ben Dalling] + + +## 0.2.2 (2024-06-08) ### Changes diff --git a/Makefile b/Makefile index 8f590dd..63be7e7 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ .EXPORT_ALL_VARIABLES: -GIT_TAG = 0.2.2 +GIT_TAG = 1.0.0 all: lint build test diff --git a/requirements.txt b/requirements.txt index 89a4a84..cc79707 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ +attrs==23.2.0 bandit==1.7.8 certifi==2024.6.2 charset-normalizer==3.3.2 @@ -11,6 +12,8 @@ gitchangelog==3.0.4 idna==3.7 iniconfig==2.0.0 isort==5.13.2 +jsonschema==4.22.0 +jsonschema-specifications==2023.12.1 Mako==1.3.5 mando==0.7.1 markdown-it-py==3.0.0 @@ -38,8 +41,10 @@ python-dateutil==2.9.0.post0 pytz==2024.1 PyYAML==6.0.1 radon==6.0.1 +referencing==0.35.1 requests==2.32.3 rich==13.7.1 +rpds-py==0.18.1 setuptools==70.0.0 six==1.16.0 smart-open==7.0.4 diff --git a/tests/common/features/json-schema.feature b/tests/common/features/json-schema.feature new file mode 100644 index 0000000..8b29edb --- /dev/null +++ b/tests/common/features/json-schema.feature @@ -0,0 +1,11 @@ +Feature: Test Data Package Files Against the Relevant JSON Schema + + Scenario Outline: Data Packages + Given the JSON Schema URL of + And the Data Package File of + When the Data Package File Exists + Then the Data Package File Validates Against the Schema + + Examples: + | json_schema_url | data_package_file_name | + | https://specs.frictionlessdata.io/schemas/tabular-data-package.json | uk/gov/metoffice/historic_station_data/datapackage.json | diff --git a/tests/common/step_defs/test_json_schema.py b/tests/common/step_defs/test_json_schema.py new file mode 100644 index 0000000..bdd0c72 --- /dev/null +++ b/tests/common/step_defs/test_json_schema.py @@ -0,0 +1,56 @@ +"""Test Data Package Files Against the Relevant JSON Schema feature tests.""" + +import json +import logging + +from jsonschema import validate +from jsonschema.exceptions import ValidationError +from pytest_bdd import given, parsers, scenario, then, when +from smart_open import open + + +@scenario('../features/json-schema.feature', 'Data Packages') +def test_data_packages(): + """Data Packages.""" + + +@given(parsers.parse('the Data Package File of {data_package_file_name}'), target_fixture='data_package') +def _(data_package_file_name: str) -> dict: + """the Data Package File of .""" + try: + with open(data_package_file_name) as stream: + data_package = json.load(stream) + except FileNotFoundError: + logging.error(f'File not found "{data_package_file_name}".') + return None + + return data_package + + +@given(parsers.parse('the JSON Schema URL of {json_schema_url}'), target_fixture='json_schema') +def _(json_schema_url: str) -> dict: + """the JSON Schema URL of .""" + with open(json_schema_url) as stream: + json_schema = json.load(stream) + + return json_schema + + +@when('the Data Package File Exists') +def _(data_package: dict) -> None: + """the Data Package File Exists.""" + assert data_package is not None, 'Data package not available, see error message.' + + +@then('the Data Package File Validates Against the Schema') +def _(data_package: dict, json_schema: dict) -> None: + """the Data Package File Validates Against the Schema.""" + is_valid = False + + try: + validate(instance=data_package, schema=json_schema) + is_valid = True + except ValidationError as ex: + message = ex.message + + assert is_valid, message diff --git a/uk/gov/metoffice/historic_station_data/README.md b/uk/gov/metoffice/historic_station_data/README.md index a0bc9fa..c00ae8a 100644 --- a/uk/gov/metoffice/historic_station_data/README.md +++ b/uk/gov/metoffice/historic_station_data/README.md @@ -3,20 +3,18 @@ A dataset wrangled from the UK Met Office and available at . -## Fields +The wrangling consists of: -1. station_name: The name of the station that the data refers to. -1. metadata: Free text associated with the station. Will be blank if the row is for a data record. -1. month: The [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601) month (e.g. 2024-04). Will be blank if the row is a data record. -1. tmax: Mean daily maximum temperature (centigrade). Will be blank if month is blank or no data is available. -1. tmax_is_estimated: True if tmax is estimated, false if is estimated, blank if tmax or month is absent. -1. tmin: Mean daily minimum temperature (centigrade). Will be blank if month is blank or no data is available. -1. tmin_is_estimated: True if tmin is estimated, false if is not estimated, blank if tmin or month is absent. -1. af: Days of air frost. Will be blank if month is blank for no data is available. -1. af_is_estimated: True if af is estimated, false if is not estimated, blank if af or main is absent. -1. rain: Rainfall (mm). Will be blank if month is blank or no rain data is available. -1. rain_is_estimated: True if rain is estimated, false if is not estimated, blank if rain or main is absent. -1. sun: The number of hours of sunshine. Will be blank if month is blank or no sun data is available. -1. sun_is_estimated: True if sun is estimated, false if is not estimated, blank if sun or main is absent. -1. sun_instrument: The sun instrument (blank if unavailable). Will either be Campbell-Stokes recorder or Campbell-Stokes recorder. -1. provisional: Data are indicated as provisional until the full network quality control has been carried out. After this, data are final. +- Cleaning some data errors. +- Setting the sun instrument type to the last known instrument for a station. + +The original data is licenced under the +[Open Government Licence 3.0 (United Kingdom)](https://www.nationalarchives.gov.uk/doc/open-government-licence/version/3/) +and in this wrangled form is covered by the +[Creative Commons Attribution 4.0](https://creativecommons.org/licenses/by/4.0/) +licence. + +For details on the fields, see the `datapackage.json` file which follows the +format of the +[Tabular Data Package](https://specs.frictionlessdata.io/tabular-data-package/) +specification. diff --git a/uk/gov/metoffice/historic_station_data/datapackage.json b/uk/gov/metoffice/historic_station_data/datapackage.json new file mode 100644 index 0000000..8356c4f --- /dev/null +++ b/uk/gov/metoffice/historic_station_data/datapackage.json @@ -0,0 +1,186 @@ +{ + "name": "io.cbdq.metoffice.historic_station_data", + "title": "Met Office Historical Station Data", + "description": "# Met Office Historical Station Data\n\nA dataset wrangled from the UK Met Office and available at\n.\n\nThe wrangling consists of:\n\n- Cleaning some data errors.\n- Setting the sun instrument type to the last known instrument for a station.", + "profile": "tabular-data-package", + "licenses": [ + { + "name": "OGL-UK-3.0", + "path": "https://www.nationalarchives.gov.uk/doc/open-government-licence/version/3/", + "title": "Open Government Licence 3.0 (United Kingdom)" + }, + { + "name": "CC-BY-4.0", + "path": "https://creativecommons.org/licenses/by/4.0/", + "title": "Creative Commons Attribution 4.0" + } + ], + "homepage": "https://github.com/cbdq-io/datasets/tree/main/uk/gov/metoffice/historic_station_data", + "sources": [ + { + "title": "Met Office Historical Station Data", + "path": "https://www.metoffice.gov.uk/research/climate/maps-and-data/historic-station-data" + } + ], + "contributers": [ + { + "title": "Cloud Based DQ Ltd.", + "path": "https://www.cbdq.io", + "role": "wrangler" + } + ], + "keywords": [ + "climate", + "MetOffice", + "change", + "historic" + ], + "image": "https://upload.wikimedia.org/wikipedia/en/thumb/f/f4/Met_Office.svg/150px-Met_Office.svg.png", + "resources": [ + { + "profile": "tabular-data-resource", + "name": "historic_station_data", + "path": "data/historic-station-data.csv", + "schema": { + "missingValues": [""], + "fields": [ + { + "name": "station_name", + "type": "string", + "description": "The name of the station that the data refers to.", + "constraints": { + "enum": [ + "aberporth", + "armagh", + "ballypatrick", + "bradford", + "braemar", + "camborne", + "cambridge", + "cardiff", + "chivenor", + "cwmystwyth", + "dunstaffnage", + "durham", + "eastbourne", + "eskdalemuir", + "heathrow", + "hurn", + "lerwick", + "leuchars", + "lowestoft", + "manston", + "nairn", + "newtonrigg", + "oxford", + "paisley", + "ringway", + "rossonwye", + "shawbury", + "sheffield", + "southampton", + "stornoway", + "suttonbonington", + "tiree", + "valley", + "waddington", + "whitby", + "wickairport", + "yeovilton" + ] + } + }, + { + "name": "metadata", + "type": "string", + "description": "Free text associated with the station. Will be missing if the row is for a data record." + }, + { + "name": "month", + "type": "yearmonth", + "description": "The ISO 8601 month. Will be missing if the row is a metadata record.", + "example": "2024-04" + }, + { + "name": "tmax", + "type": "number", + "description": "Mean daily maximum temperature (centigrade)." + }, + { + "name": "tmax_is_estimated", + "type": "boolean", + "trueValues": ["True"], + "falseValues": ["False"], + "description": "True if tmax is estimated." + }, + { + "name": "tmin", + "type": "number", + "description": "Mean daily minimum temperature (centigrade)." + }, + { + "name": "tmin_is_estimated", + "type": "boolean", + "trueValues": ["True"], + "falseValues": ["False"], + "description": "True if tmin is estimated." + }, + { + "name": "af", + "type": "integer", + "description": "Days of air frost." + }, + { + "name": "af_is_estimated", + "type": "boolean", + "trueValues": ["True"], + "falseValues": ["False"], + "description":"True if af is estimated." + }, + { + "name": "rain", + "type": "number", + "description": "Total rainfall (mm)." + }, + { + "name": "rain_is_estimated", + "type": "boolean", + "trueValues": ["True"], + "falseValues": ["False"], + "description": "True if rain is estimated." + }, + { + "name": "sun", + "type": "number", + "description": "Total sunshine duration." + }, + { + "name": "sun_is_estimated", + "type": "boolean", + "trueValues": ["True"], + "falseValues": ["False"], + "description": "True if sun is estimated." + }, + { + "name": "sun_instrument", + "type": "string", + "description": "The sun instrument installed at the station.", + "constraints": { + "enum": [ + "Campbell Stokes recorder", + "Kipp & Zonen sensor" + ] + } + }, + { + "name": "provisional", + "type": "boolean", + "trueValues": ["True"], + "falseValues": ["False"], + "description": "Data are indicated as provisional until the full network quality control has been carried out. After this, data are final." + } + ] + } + } + ] +}