Add possibility to get SQL migrations based on git commit hashes

Amsterdam · Dec 1, 2023 · 2c786ad · 2c786ad
1 parent 765afdb
commit 2c786ad
Show file tree

Hide file tree

Showing 5 changed files with 79 additions and 8 deletions.
diff --git a/CHANGES.md b/CHANGES.md
@@ -1,3 +1,8 @@
+# 2023-12-01 (5.18.0)
+
+* Add possibility to use git commit hashes when creating SQL migrations
+  from amsterdam schema table definitions.
+
 # 2023-11-24 (5.17.18)
 
 * Bugfix: Update nested table when nested field name has underscore.

diff --git a/pyproject.toml b/pyproject.toml
@@ -25,5 +25,7 @@ exclude = '''
   | dist
 )/
 '''
+[tool.bandit]
+skips = ["B101", "B404"]
 
 github_url = "https://github.com/Amsterdam/schema-tools"
diff --git a/setup.cfg b/setup.cfg
@@ -1,6 +1,6 @@
 [metadata]
 name = amsterdam-schema-tools
-version = 5.17.18
+version = 5.18.0
 url = https://github.com/amsterdam/schema-tools
 license = Mozilla Public 2.0
 author = Team Data Diensten, van het Dataplatform onder de Directie Digitale Voorzieningen (Gemeente Amsterdam)

diff --git a/src/schematools/contrib/django/management/commands/sqlmigrate_schema.py b/src/schematools/contrib/django/management/commands/sqlmigrate_schema.py
@@ -1,6 +1,10 @@
 from __future__ import annotations
 
+import os
+import subprocess
+import tempfile
 from collections import deque
+from pathlib import Path
 
 from django.apps import apps
 from django.conf import settings
@@ -27,7 +31,14 @@ class Command(BaseCommand):
 
         ./manage.py sqlmigrate_schema -v3 meetbouten meetbouten v1.0.0 v1.1.0
 
-    The command is speed up by pointing ``SCHEMA_URL`` or ``--schema-url``
+        or, using the schemas from local filesystem and getting the
+        older version of a schema from a git commit hash:
+
+        ./manage.py sqlmigrate_schema -v3 meetbouten meetbouten \
+                7d986c96:../amsterdam-schema/datasets/meetbouten/dataset.json \
+                ../amsterdam-schema/datasets/meetbouten/dataset.json \
+                ---from-files
+    The command is sped up by pointing ``SCHEMA_URL`` or ``--schema-url``
     to a local filesystem repository of the schema files. Otherwise it downloads
     the current schemas from the default remote repository.
     """
@@ -46,15 +57,24 @@ def add_arguments(self, parser: CommandParser) -> None:
             default=DEFAULT_DB_ALIAS,
             help='Nominates a database to create SQL for. Defaults to the "default" database.',
         )
+        parser.add_argument(
+            "--from-files",
+            action="store_true",
+            help="Get the tables from a file. NB. the SCHEMA_URL also needs to be file-based!",
+        )
         parser.add_argument("schema", help="Schema name")
         parser.add_argument("table", help="Table name")
         # Currently, the old and new version needs to be given.
         # There is no way yet to retrieve a listing of available table versions
         parser.add_argument(
-            "version1", metavar="OLDVERSION", help="Old table version, e.g. v1.0.0"
+            "version1",
+            metavar="OLDVERSION",
+            help="Old table version, e.g. v1.0.0, or `path-to-dataset-json` with --from-files",
         )
         parser.add_argument(
-            "version2", metavar="NEWVERSION", help="New table version, e.g. v1.1.0"
+            "version2",
+            metavar="NEWVERSION",
+            help="New table version, e.g. v1.1.0, , or `path-to-dataset-json` with --from-files",
         )
 
     def handle(self, *args, **options) -> None:
@@ -67,8 +87,19 @@ def handle(self, *args, **options) -> None:
 
         # Load the data from the schema repository
         dataset = self._load_dataset(options["schema"])
-        table1 = self._load_table_version(dataset, options["table"], options["version1"])
-        table2 = self._load_table_version(dataset, options["table"], options["version2"])
+        if options["from_files"]:
+            assert not options["schema_url"].startswith(
+                "http"
+            ), "The --from-files can only work with a SCHEMA_URL on the local filesystem."
+            table1 = self._load_table_version_from_file(
+                dataset.id, options["table"], self._checkout_file_if_needed(options["version1"])
+            )
+            table2 = self._load_table_version_from_file(
+                dataset.id, options["table"], self._checkout_file_if_needed(options["version2"])
+            )
+        else:
+            table1 = self._load_table_version(dataset, options["table"], options["version1"])
+            table2 = self._load_table_version(dataset, options["table"], options["version2"])
         real_apps = self._load_dependencies(dataset)
         dummy_dataset = self._get_dummy_dataset_model(dataset)
 
@@ -129,6 +160,37 @@ def _load_table_version(
 
             raise CommandError(f"Table version '{table_id}/{version}' does not exist.") from e
 
+    def _checkout_file_if_needed(self, file_path):
+        """Git check out the file if needed.
+
+        If the file_path points to a git hash,
+        get the content of the file and put this in a temp file.
+        So e.g. file_path can be `7d986c96:../amsterdam-schema/datasets/bag/dataset.json`
+        Assumption is that the `git` binary is available on the system.
+        """
+        if ":" in file_path:
+            git_hash, bare_file_path = file_path.split(":")
+            pl_path = Path(bare_file_path)
+            result = subprocess.run(  # nosec
+                ["git", "show", f"{git_hash}:./{pl_path.name}"],
+                cwd=pl_path.parent,
+                capture_output=True,
+            )
+            handle, tmp_path = tempfile.mkstemp()
+            with os.fdopen(handle, "wb") as fp:
+                fp.write(result.stdout)
+                fp.close()
+            return tmp_path
+
+        return file_path
+
+    def _load_table_version_from_file(
+        self, dataset_id: str, table_id: str, file_path: str
+    ) -> DatasetTableSchema:
+        dataset = self.loader.get_dataset_from_file(file_path, allow_external_files=True)
+        assert dataset.id == dataset_id, f"The id in '{file_path}' does not match '{dataset_id}'"
+        return dataset.get_table_by_id(table_id)
+
     def _load_dependencies(self, dataset: DatasetSchema) -> list[str]:
         """Make sure any dependencies are loaded.
 

diff --git a/src/schematools/loaders.py b/src/schematools/loaders.py
@@ -365,7 +365,9 @@ def get_root(cls, dataset_file: Path | str) -> Path:
         except StopIteration:
             raise ValueError(f"No 'datasets' root found for file '{dataset_file}'.")
 
-    def get_dataset_from_file(self, dataset_file: Path | str, prefetch_related: bool = False):
+    def get_dataset_from_file(
+        self, dataset_file: Path | str, prefetch_related: bool = False, allow_external_files=False
+    ):
         """Extra method, to read a dataset directly from a JSON file.
         This is mainly a helper function for testing.
 
@@ -394,7 +396,7 @@ def get_dataset_from_file(self, dataset_file: Path | str, prefetch_related: bool
             dataset_file = self.root.joinpath(dataset_file)
         dataset_file = dataset_file.resolve()  # removes ../../ entries, so is_relative_to() works
 
-        if not dataset_file.is_relative_to(self.root):
+        if not allow_external_files and not dataset_file.is_relative_to(self.root):
             raise ValueError(
                 f"Dataset file '{dataset_file}' does not exist in the schema repository"
             )