Skip to content

Commit

Permalink
Add possibility to get SQL migrations based on git commit hashes
Browse files Browse the repository at this point in the history
  • Loading branch information
jjmurre committed Dec 1, 2023
1 parent 765afdb commit 2c786ad
Show file tree
Hide file tree
Showing 5 changed files with 79 additions and 8 deletions.
5 changes: 5 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# 2023-12-01 (5.18.0)

* Add possibility to use git commit hashes when creating SQL migrations
from amsterdam schema table definitions.

# 2023-11-24 (5.17.18)

* Bugfix: Update nested table when nested field name has underscore.
Expand Down
2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,5 +25,7 @@ exclude = '''
| dist
)/
'''
[tool.bandit]
skips = ["B101", "B404"]

github_url = "https://github.com/Amsterdam/schema-tools"
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[metadata]
name = amsterdam-schema-tools
version = 5.17.18
version = 5.18.0
url = https://github.com/amsterdam/schema-tools
license = Mozilla Public 2.0
author = Team Data Diensten, van het Dataplatform onder de Directie Digitale Voorzieningen (Gemeente Amsterdam)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
from __future__ import annotations

import os
import subprocess
import tempfile
from collections import deque
from pathlib import Path

from django.apps import apps
from django.conf import settings
Expand All @@ -27,7 +31,14 @@ class Command(BaseCommand):
./manage.py sqlmigrate_schema -v3 meetbouten meetbouten v1.0.0 v1.1.0
The command is speed up by pointing ``SCHEMA_URL`` or ``--schema-url``
or, using the schemas from local filesystem and getting the
older version of a schema from a git commit hash:
./manage.py sqlmigrate_schema -v3 meetbouten meetbouten \
7d986c96:../amsterdam-schema/datasets/meetbouten/dataset.json \
../amsterdam-schema/datasets/meetbouten/dataset.json \
---from-files
The command is sped up by pointing ``SCHEMA_URL`` or ``--schema-url``
to a local filesystem repository of the schema files. Otherwise it downloads
the current schemas from the default remote repository.
"""
Expand All @@ -46,15 +57,24 @@ def add_arguments(self, parser: CommandParser) -> None:
default=DEFAULT_DB_ALIAS,
help='Nominates a database to create SQL for. Defaults to the "default" database.',
)
parser.add_argument(
"--from-files",
action="store_true",
help="Get the tables from a file. NB. the SCHEMA_URL also needs to be file-based!",
)
parser.add_argument("schema", help="Schema name")
parser.add_argument("table", help="Table name")
# Currently, the old and new version needs to be given.
# There is no way yet to retrieve a listing of available table versions
parser.add_argument(
"version1", metavar="OLDVERSION", help="Old table version, e.g. v1.0.0"
"version1",
metavar="OLDVERSION",
help="Old table version, e.g. v1.0.0, or `path-to-dataset-json` with --from-files",
)
parser.add_argument(
"version2", metavar="NEWVERSION", help="New table version, e.g. v1.1.0"
"version2",
metavar="NEWVERSION",
help="New table version, e.g. v1.1.0, , or `path-to-dataset-json` with --from-files",
)

def handle(self, *args, **options) -> None:
Expand All @@ -67,8 +87,19 @@ def handle(self, *args, **options) -> None:

# Load the data from the schema repository
dataset = self._load_dataset(options["schema"])
table1 = self._load_table_version(dataset, options["table"], options["version1"])
table2 = self._load_table_version(dataset, options["table"], options["version2"])
if options["from_files"]:
assert not options["schema_url"].startswith(
"http"
), "The --from-files can only work with a SCHEMA_URL on the local filesystem."
table1 = self._load_table_version_from_file(
dataset.id, options["table"], self._checkout_file_if_needed(options["version1"])
)
table2 = self._load_table_version_from_file(
dataset.id, options["table"], self._checkout_file_if_needed(options["version2"])
)
else:
table1 = self._load_table_version(dataset, options["table"], options["version1"])
table2 = self._load_table_version(dataset, options["table"], options["version2"])
real_apps = self._load_dependencies(dataset)
dummy_dataset = self._get_dummy_dataset_model(dataset)

Expand Down Expand Up @@ -129,6 +160,37 @@ def _load_table_version(

raise CommandError(f"Table version '{table_id}/{version}' does not exist.") from e

def _checkout_file_if_needed(self, file_path):
"""Git check out the file if needed.
If the file_path points to a git hash,
get the content of the file and put this in a temp file.
So e.g. file_path can be `7d986c96:../amsterdam-schema/datasets/bag/dataset.json`
Assumption is that the `git` binary is available on the system.
"""
if ":" in file_path:
git_hash, bare_file_path = file_path.split(":")
pl_path = Path(bare_file_path)
result = subprocess.run( # nosec
["git", "show", f"{git_hash}:./{pl_path.name}"],
cwd=pl_path.parent,
capture_output=True,
)
handle, tmp_path = tempfile.mkstemp()
with os.fdopen(handle, "wb") as fp:
fp.write(result.stdout)
fp.close()
return tmp_path

return file_path

def _load_table_version_from_file(
self, dataset_id: str, table_id: str, file_path: str
) -> DatasetTableSchema:
dataset = self.loader.get_dataset_from_file(file_path, allow_external_files=True)
assert dataset.id == dataset_id, f"The id in '{file_path}' does not match '{dataset_id}'"
return dataset.get_table_by_id(table_id)

def _load_dependencies(self, dataset: DatasetSchema) -> list[str]:
"""Make sure any dependencies are loaded.
Expand Down
6 changes: 4 additions & 2 deletions src/schematools/loaders.py
Original file line number Diff line number Diff line change
Expand Up @@ -365,7 +365,9 @@ def get_root(cls, dataset_file: Path | str) -> Path:
except StopIteration:
raise ValueError(f"No 'datasets' root found for file '{dataset_file}'.")

def get_dataset_from_file(self, dataset_file: Path | str, prefetch_related: bool = False):
def get_dataset_from_file(
self, dataset_file: Path | str, prefetch_related: bool = False, allow_external_files=False
):
"""Extra method, to read a dataset directly from a JSON file.
This is mainly a helper function for testing.
Expand Down Expand Up @@ -394,7 +396,7 @@ def get_dataset_from_file(self, dataset_file: Path | str, prefetch_related: bool
dataset_file = self.root.joinpath(dataset_file)
dataset_file = dataset_file.resolve() # removes ../../ entries, so is_relative_to() works

if not dataset_file.is_relative_to(self.root):
if not allow_external_files and not dataset_file.is_relative_to(self.root):
raise ValueError(
f"Dataset file '{dataset_file}' does not exist in the schema repository"
)
Expand Down

0 comments on commit 2c786ad

Please sign in to comment.