diff --git a/.github/actions/python_build/action.yml b/.github/actions/python_build/action.yml index 97d9b3af2..17e0c53f6 100644 --- a/.github/actions/python_build/action.yml +++ b/.github/actions/python_build/action.yml @@ -10,9 +10,10 @@ runs: - name: Install python dependencies shell: bash run: | + # - install pip libs + # note: gdal requires the extra args cd python pip install build wheel pyspark==${{ matrix.spark }} numpy==${{ matrix.numpy }} - pip install numpy==${{ matrix.numpy }} pip install --no-build-isolation --no-cache-dir --force-reinstall gdal==${{ matrix.gdal }} pip install . - name: Test and build python package diff --git a/.github/actions/scala_build/action.yml b/.github/actions/scala_build/action.yml index a25a35f26..b33c1b453 100644 --- a/.github/actions/scala_build/action.yml +++ b/.github/actions/scala_build/action.yml @@ -25,17 +25,16 @@ runs: sudo apt-add-repository "deb http://archive.ubuntu.com/ubuntu $(lsb_release -sc)-security main multiverse restricted universe" sudo apt-add-repository "deb http://archive.ubuntu.com/ubuntu $(lsb_release -sc) main multiverse restricted universe" sudo apt-get update -y - # - install numpy first - pip install --upgrade pip - pip install 'numpy>=${{ matrix.numpy }}' # - install natives sudo apt-get install -y unixodbc libcurl3-gnutls libsnappy-dev libopenjp2-7 - sudo apt-get install -y gdal-bin libgdal-dev python3-gdal - # - install gdal with numpy - pip install --no-cache-dir --force-reinstall 'GDAL[numpy]==${{ matrix.gdal }}' - sudo wget -P /usr/lib -nc https://github.com/databrickslabs/mosaic/raw/main/resources/gdal/jammy/libgdalalljni.so - sudo wget -P /usr/lib -nc https://github.com/databrickslabs/mosaic/raw/main/resources/gdal/jammy/libgdalalljni.so.30 - #sudo wget -P /usr/lib -nc https://github.com/databrickslabs/mosaic/raw/main/resources/gdal/jammy/libgdalalljni.so.30.0.3 + sudo apt-get install -y gdal-bin libgdal-dev python3-numpy python3-gdal + # - install pip libs + pip install --upgrade pip + pip install gdal==${{ matrix.gdal }} + # - add the so files + sudo wget -nv -P /usr/lib -nc https://raw.githubusercontent.com/databrickslabs/mosaic/main/resources/gdal/jammy/libgdalalljni.so + sudo wget -nv -P /usr/lib -nc https://raw.githubusercontent.com/databrickslabs/mosaic/main/resources/gdal/jammy/libgdalalljni.so.30 + sudo wget -nv -P /usr/lib -nc https://raw.githubusercontent.com/databrickslabs/mosaic/main/resources/gdal/jammy/libgdalalljni.so.30.0.3 - name: Test and build the scala JAR - skip tests is false if: inputs.skip_tests == 'false' shell: bash diff --git a/.github/workflows/build_main.yml b/.github/workflows/build_main.yml index 7380897b7..4f0c676cd 100644 --- a/.github/workflows/build_main.yml +++ b/.github/workflows/build_main.yml @@ -17,7 +17,7 @@ jobs: strategy: matrix: python: [ 3.10.12 ] - numpy: [ 1.21.5 ] + numpy: [ 1.22.4 ] gdal: [ 3.4.1 ] spark: [ 3.4.0 ] R: [ 4.2.2 ] @@ -28,7 +28,7 @@ jobs: uses: ./.github/actions/scala_build - name: build python uses: ./.github/actions/python_build - - name: build R - uses: ./.github/actions/r_build + # - name: build R + # uses: ./.github/actions/r_build - name: upload artefacts uses: ./.github/actions/upload_artefacts diff --git a/.github/workflows/build_python.yml b/.github/workflows/build_python.yml index fd2024669..30d62cb3c 100644 --- a/.github/workflows/build_python.yml +++ b/.github/workflows/build_python.yml @@ -13,7 +13,7 @@ jobs: strategy: matrix: python: [ 3.10.12 ] - numpy: [ 1.21.5 ] + numpy: [ 1.22.4 ] gdal: [ 3.4.1 ] spark: [ 3.4.0 ] R: [ 4.2.2 ] diff --git a/.github/workflows/build_r.yml b/.github/workflows/build_r.yml index c0953eb66..986ca744d 100644 --- a/.github/workflows/build_r.yml +++ b/.github/workflows/build_r.yml @@ -14,7 +14,7 @@ jobs: strategy: matrix: python: [ 3.10.12 ] - numpy: [ 1.21.5 ] + numpy: [ 1.22.4 ] gdal: [ 3.4.1 ] spark: [ 3.4.0 ] R: [ 4.2.2 ] diff --git a/.github/workflows/build_scala.yml b/.github/workflows/build_scala.yml index 9505b71e6..0269130d8 100644 --- a/.github/workflows/build_scala.yml +++ b/.github/workflows/build_scala.yml @@ -12,7 +12,7 @@ jobs: strategy: matrix: python: [ 3.10.12 ] - numpy: [ 1.21.5 ] + numpy: [ 1.22.4 ] gdal: [ 3.4.1 ] spark: [ 3.4.0 ] R: [ 4.2.2 ] diff --git a/.github/workflows/pypi-release.yml b/.github/workflows/pypi-release.yml index c844c44a7..251ead879 100644 --- a/.github/workflows/pypi-release.yml +++ b/.github/workflows/pypi-release.yml @@ -10,7 +10,7 @@ jobs: strategy: matrix: python: [ 3.10.12 ] - numpy: [ 1.21.5 ] + numpy: [ 1.22.4 ] gdal: [ 3.4.1 ] spark: [ 3.4.0 ] R: [ 4.2.2 ] diff --git a/CHANGELOG.md b/CHANGELOG.md index eff0a47fe..dbaa72653 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,12 @@ -## v0.3.14 +## v0.4.0 [DBR 13.3 LTS] +- First release for DBR 13.3 LTS which is Ubuntu Jammy and Spark 3.4.1. Not backwards compatible, meaning it will not run on prior DBRs; requires either a Photon DBR or a ML Runtime (__Standard, non-Photon DBR no longer allowed__). +- New `setup_fuse_install` function to meet various requirements arising with Unity Catalog + Shared Access clusters; removed the scala equivalent function, making artifact setup and install python-first for scala and Spark SQL. +- Removed OSS ESRI Geometry API for 0.4 series, JTS now the only vector provider. +- MosaicAnalyzer functions now accept Spark DataFrames instead of MosaicFrame, which has been removed. +- Docs for 0.3.x have been archived and linked from current docs; notebooks for 0.3.x have been separated from current notebooks. +- This release targets Assigned (vs Shared Access) clusters and offers python and scala language bindings; SQL expressions will not register in this release within Unity Catalog. + +## v0.3.14 [DBR < 13] - Fixes for Warning and Error messages on mosaic_enable call. - Performance improvements for raster functions. - Fix support for GDAL configuration via spark config (use 'spark.databricks.labs.mosaic.gdal.' prefix). diff --git a/R/generate_R_bindings.R b/R/generate_R_bindings.R index 093d68e95..f60199efb 100644 --- a/R/generate_R_bindings.R +++ b/R/generate_R_bindings.R @@ -52,7 +52,7 @@ build_column_specifiers <- function(input){ build_method<-function(input){ function_name <- input$function_name arg_names <- lapply(input$args, function(x){c(x[1])}) - #this handles converting non-Column arguments to their R equivalents + # this handles converting non-Column arguments to their R equivalents argument_parser <- function(x){ if(x[2] == 'Int'){ x[2] <- "numeric" diff --git a/README.md b/README.md index 698c4d4b4..7551bb864 100644 --- a/README.md +++ b/README.md @@ -32,26 +32,53 @@ The supported languages are Scala, Python, R, and SQL. ## How does it work? -The Mosaic library is written in Scala to guarantee maximum performance with Spark and when possible, it uses code generation to give an extra performance boost. - -The other supported languages (Python, R and SQL) are thin wrappers around the Scala code. +The Mosaic library is written in Scala (JVM) to guarantee maximum performance with Spark and when possible, it uses code generation to give an extra performance boost. +__The other supported languages (Python, R and SQL) are thin wrappers around the Scala (JVM) code.__ ![mosaic-logical-design](src/main/resources/MosaicLogicalDesign.png) Image1: Mosaic logical design. ## Getting started -We recommend using Databricks Runtime versions 11.3 LTS or 12.2 LTS with Photon enabled; this will leverage the -Databricks H3 expressions when using H3 grid system. +### Mosaic 0.4.x Series [Latest] + +We recommend using Databricks Runtime versions 13.3 LTS with Photon enabled. + +:warning: **Mosaic 0.4.x series only supports DBR 13**. If running on a different DBR with throw an exception: + +> DEPRECATION ERROR: Mosaic v0.4.x series only supports Databricks Runtime 13. You can specify `%pip install 'databricks-mosaic<0.4,>=0.3'` for DBR < 13. + +As of the 0.4.0 release, Mosaic issues the following ERROR when initialized on a cluster that is neither Photon Runtime nor Databricks Runtime ML [[ADB](https://learn.microsoft.com/en-us/azure/databricks/runtime/) | [AWS](https://docs.databricks.com/runtime/index.html) | [GCP](https://docs.gcp.databricks.com/runtime/index.html)]: + +> DEPRECATION ERROR: Please use a Databricks Photon-enabled Runtime for performance benefits or Runtime ML for spatial AI benefits; Mosaic 0.4.x series restricts executing this cluster. + +__Language Bindings__ + +As of Mosaic 0.4.0 (subject to change in follow-on releases)... -:warning: **Mosaic 0.3 series does not support DBR 13** (coming soon); also, DBR 10 is no longer supported in Mosaic. +* _No Mosaic SQL expressions cannot yet be registered with [Unity Catalog](https://www.databricks.com/product/unity-catalog) due to API changes affecting DBRs >= 13._ +* [Assigned Clusters](https://docs.databricks.com/en/compute/configure.html#access-modes): Mosaic Python, R, and Scala APIs. +* [Shared Access Clusters](https://docs.databricks.com/en/compute/configure.html#access-modes): Mosaic Scala API (JVM) with Admin [allowlisting](https://docs.databricks.com/en/data-governance/unity-catalog/manage-privileges/allowlist.html); _Python bindings to Mosaic Scala APIs are blocked by Py4J Security on Shared Access Clusters._ -As of the 0.3.11 release, Mosaic issues the following warning when initialized on a cluster that is neither Photon Runtime nor Databricks Runtime ML [[ADB](https://learn.microsoft.com/en-us/azure/databricks/runtime/) | [AWS](https://docs.databricks.com/runtime/index.html) | [GCP](https://docs.gcp.databricks.com/runtime/index.html)]: +__Additional Notes:__ -> DEPRECATION WARNING: Mosaic is not supported on the selected Databricks Runtime. Mosaic will stop working on this cluster after v0.3.x. Please use a Databricks Photon-enabled Runtime (for performance benefits) or Runtime ML (for spatial AI benefits). +As of Mosaic 0.4.0 (subject to change in follow-on releases)... -If you are receiving this warning in v0.3.11+, you will want to begin to plan for a supported runtime. The reason we are making this change is that we are streamlining Mosaic internals to be more aligned with future product APIs which are powered by Photon. Along this direction of change, Mosaic will be standardizing to JTS as its default and supported Vector Geometry Provider. +1. [Unity Catalog](https://www.databricks.com/product/unity-catalog): Enforces process isolation which is difficult to accomplish with custom JVM libraries; as such only built-in (aka platform provided) JVM APIs can be invoked from other supported languages in Shared Access Clusters. +2. [Volumes](https://docs.databricks.com/en/connect/unity-catalog/volumes.html): Along the same principle of isolation, clusters (both assigned and shared access) can read Volumes via relevant built-in readers and writers or via custom python calls which do not involve any custom JVM code. + +### Mosaic 0.3.x Series + +We recommend using Databricks Runtime versions 12.2 LTS with Photon enabled. + +:warning: **Mosaic 0.3.x series does not support DBR 13**. + +As of the 0.3.11 release, Mosaic issues the following WARNING when initialized on a cluster that is neither Photon Runtime nor Databricks Runtime ML [[ADB](https://learn.microsoft.com/en-us/azure/databricks/runtime/) | [AWS](https://docs.databricks.com/runtime/index.html) | [GCP](https://docs.gcp.databricks.com/runtime/index.html)]: + +> DEPRECATION WARNING: Please use a Databricks Photon-enabled Runtime for performance benefits or Runtime ML for spatial AI benefits; Mosaic will stop working on this cluster after v0.3.x. + +If you are receiving this warning in v0.3.11+, you will want to begin to plan for a supported runtime. The reason we are making this change is that we are streamlining Mosaic internals to be more aligned with future product APIs which are powered by Photon. Along this direction of change, Mosaic has standardized to JTS as its default and supported Vector Geometry Provider. ### Documentation @@ -114,21 +141,24 @@ import com.databricks.labs.mosaic.JTS val mosaicContext = MosaicContext.build(H3, JTS) mosaicContext.register(spark) ``` - +__Note: Mosaic 0.4.x SQL bindings for DBR 13 not yet available in Unity Catalog due to API changes.__ ## Examples +Here are some example notebooks, check the language links for latest [[Python](/notebooks/examples/python/) | [Scala](/notebooks/examples/scala/) | [SQL](/notebooks/examples/sql/) | [R](/notebooks/examples/R/)]: + | Example | Description | Links | | --- | --- | --- | -| __Quick Start__ | Example of performing spatial point-in-polygon joins on the NYC Taxi dataset | [python](/notebooks/examples/python/QuickstartNotebook.py), [scala](notebooks/examples/scala/QuickstartNotebook.scala), [R](notebooks/examples/R/QuickstartNotebook.r), [SQL](notebooks/examples/sql/QuickstartNotebook.sql) | +| __Quick Start__ | Example of performing spatial point-in-polygon joins on the NYC Taxi dataset | [python](/notebooks/examples/python/QuickstartNotebook.ipynb), [scala](notebooks/examples/scala/QuickstartNotebook.ipynb), [R](notebooks/examples/R/QuickstartNotebook.r), [SQL](notebooks/examples/sql/QuickstartNotebook.ipynb) | +| Shapefiles | Examples of reading multiple shapefiles | [python](notebooks/examples/python/Shapefiles/) | | Spatial KNN | Runnable notebook-based example using Mosaic [SpatialKNN](https://databrickslabs.github.io/mosaic/models/spatial-knn.html) model | [python](notebooks/examples/python/SpatialKNN) | -| Open Street Maps | Ingesting and processing with Delta Live Tables the Open Street Maps dataset to extract buildings polygons and calculate aggregation statistics over H3 indexes | [python](notebooks/examples/python/OpenStreetMaps) | +| NetCDF | Read multiple NetCDFs, process through various data engineering steps before analyzing and rendering | [python](notebooks/examples/python/NetCDF/) | | STS Transfers | Detecting Ship-to-Ship transfers at scale by leveraging Mosaic to process AIS data. | [python](notebooks/examples/python/Ship2ShipTransfers), [blog](https://medium.com/@timo.roest/ship-to-ship-transfer-detection-b370dd9d43e8) | -You can import those examples in Databricks workspace using [these instructions](https://docs.databricks.com/notebooks/notebooks-manage.html#import-a-notebook). +You can import those examples in Databricks workspace using [these instructions](https://docs.databricks.com/en/notebooks/index.html). ## Ecosystem -Mosaic is intended to augment the existing system and unlock the potential by integrating spark, delta and 3rd party frameworks into the Lakehouse architecture. +Mosaic is intended to augment the existing system and unlock the potential by integrating [Spark](https://spark.apache.org/), [Delta Lake](https://delta.io/) and 3rd party frameworks into the Lakehouse architecture. ![mosaic-logo](src/main/resources/MosaicEcosystem.png) Image2: Mosaic ecosystem - Lakehouse integration. diff --git a/pom.xml b/pom.xml index f0759de70..8be751ab0 100644 --- a/pom.xml +++ b/pom.xml @@ -278,7 +278,7 @@ 2.12.10 2.12 3.4.0 - 0.3.14 + 0.4.0 diff --git a/python/mosaic/__init__.py b/python/mosaic/__init__.py index fd0d83624..23287839e 100644 --- a/python/mosaic/__init__.py +++ b/python/mosaic/__init__.py @@ -4,4 +4,4 @@ from .models import SpatialKNN from .readers import read -__version__ = "0.3.14" +__version__ = "0.4.0" diff --git a/python/mosaic/api/enable.py b/python/mosaic/api/enable.py index 3aea9ff7c..9cbc52136 100644 --- a/python/mosaic/api/enable.py +++ b/python/mosaic/api/enable.py @@ -10,7 +10,10 @@ from mosaic.utils.notebook_utils import NotebookUtils -def enable_mosaic(spark: SparkSession, dbutils=None) -> None: +def enable_mosaic( + spark: SparkSession, dbutils = None, log_info: bool = False, + jar_path: str = None, jar_autoattach: bool = True +) -> None: """ Enable Mosaic functions. @@ -22,9 +25,25 @@ def enable_mosaic(spark: SparkSession, dbutils=None) -> None: spark : pyspark.sql.SparkSession The active SparkSession. dbutils : dbruntime.dbutils.DBUtils - The dbutils object used for `display` and `displayHTML` functions. - Optional, only applicable to Databricks users. + Optional, specify dbutils object used for `display` and `displayHTML` functions. + log_info : bool + Logging cannot be adjusted with Unity Catalog Shared Access clusters; + if you try to do so, will throw a Py4JSecurityException. + - True will try to setLogLevel to 'info' + - False will not; Default is False + jar_path : str + Convenience when you need to change the JAR path for Unity Catalog + Volumes with Shared Access clusters + - Default is None; if provided, sets + "spark.databricks.labs.mosaic.jar.path" + jar_autoattach : bool + Convenience when you need to turn off JAR auto-attach for Unity + Catalog Volumes with Shared Access clusters. + - False will not registers the JAR; sets + "spark.databricks.labs.mosaic.jar.autoattach" to "false" + - True will register the JAR; Default is True + Returns ------- @@ -34,7 +53,7 @@ def enable_mosaic(spark: SparkSession, dbutils=None) -> None: - `spark.databricks.labs.mosaic.jar.autoattach`: 'true' (default) or 'false' Automatically attach the Mosaic JAR to the Databricks cluster? (Optional) - - `spark.databricks.labs.mosaic.jar.location` + - `spark.databricks.labs.mosaic.jar.path` Explicitly specify the path to the Mosaic JAR. (Optional and not required at all in a standard Databricks environment). - `spark.databricks.labs.mosaic.geometry.api`: 'JTS' @@ -43,8 +62,20 @@ def enable_mosaic(spark: SparkSession, dbutils=None) -> None: Explicitly specify the index system to use for optimized spatial joins. (Optional) """ + # Set spark session, conditionally: + # - set conf for jar autoattach + # - set conf for jar path + # - set log level to 'info' + if not jar_autoattach: + spark.conf.set("spark.databricks.labs.mosaic.jar.autoattach", "false") + print("...set 'spark.databricks.labs.mosaic.jar.autoattach' to false") + if jar_path is not None: + spark.conf.set("spark.databricks.labs.mosaic.jar.path", jar_path) + print(f"...set 'spark.databricks.labs.mosaic.jar.path' to '{jar_path}'") + if log_info: + spark.sparkContext.setLogLevel('info') config.mosaic_spark = spark - _ = MosaicLibraryHandler(config.mosaic_spark) + _ = MosaicLibraryHandler(config.mosaic_spark, log_info = log_info) config.mosaic_context = MosaicContext(config.mosaic_spark) # Register SQL functions @@ -56,14 +87,8 @@ def enable_mosaic(spark: SparkSession, dbutils=None) -> None: isSupported = config.mosaic_context._context.checkDBR(spark._jsparkSession) if not isSupported: - print( - """ - DEPRECATION WARNING: - Please use a Databricks: - - Photon-enabled Runtime for performance benefits - - Runtime ML for spatial AI benefits - Mosaic will stop working on this cluster after v0.3.x.""" - ) + # unexpected - checkDBR returns true or throws exception + print("""WARNING: checkDBR returned False.""") # Not yet added to the pyspark API with warnings.catch_warnings(): diff --git a/python/mosaic/api/fuse.py b/python/mosaic/api/fuse.py index eb39609a6..8f0cb3372 100644 --- a/python/mosaic/api/fuse.py +++ b/python/mosaic/api/fuse.py @@ -3,7 +3,6 @@ import os import pkg_resources import requests -import shutil __all__ = ["SetupMgr", "setup_fuse_install"] @@ -24,12 +23,9 @@ def get_install_mosaic_version() -> str: @dataclass class SetupMgr: - """ - Defaults mirror setup_gdal. - """ to_fuse_dir: str script_in_name: str = 'mosaic-gdal-init.sh' - script_out_name: str = 'mosaic-gdal-init.sh' + script_out_name: str = 'mosaic-fuse-init.sh' with_mosaic_pip: bool = False with_gdal: bool = True with_ubuntugis: bool = False @@ -37,18 +33,27 @@ class SetupMgr: jar_copy: bool = False jni_so_copy: bool = False - def configure(self) -> None: + def configure(self) -> bool: """ Handle various config options. - if `with_mosaic_pip` or `with_gdal` or `with_ubuntugis`, script will be configured and written. + Returns True unless resources fail to download. """ # - set the mosaic and github versions # will be used in downloading resources # may be used in pip install mosaic_version = get_install_mosaic_version() github_version = mosaic_version # <- valid or None + pip_str = '' + release_version = None + if ( + self.override_mosaic_version is not None and + self.override_mosaic_version == 'main' + ): + github_version = 'main' + elif ( self.override_mosaic_version is not None and set(self.override_mosaic_version).issubset(set('=0123456789.')) ): @@ -56,12 +61,10 @@ def configure(self) -> None: elif mosaic_version is None: github_version = 'main' - # TODOS AFTER PR MERGED: - # [1] CHANGE URL TO ACTUAL MOSAIC (not 'mjohns-databricks'): - # 'https://raw.githubusercontent.com/databrickslabs/mosaic' - # [2] USE f'{GITHUB_CONTENT_URL_BASE}/{github_version}' (not 'gdal-jammy-1') - GITHUB_CONTENT_URL_BASE = 'https://raw.githubusercontent.com/mjohns-databricks/mosaic' - GITHUB_CONTENT_TAG_URL = f'{GITHUB_CONTENT_URL_BASE}/gdal-jammy-1' + GITHUB_CONTENT_URL_BASE = 'https://raw.githubusercontent.com/databrickslabs/mosaic' + GITHUB_CONTENT_TAG_URL = f'{GITHUB_CONTENT_URL_BASE}/v_{github_version}' + if github_version == 'main': + GITHUB_CONTENT_TAG_URL = f'{GITHUB_CONTENT_URL_BASE}/main' # - generate fuse dir path os.makedirs(self.to_fuse_dir, exist_ok=True) @@ -71,10 +74,12 @@ def configure(self) -> None: if with_script: # - start with the unconfigured script script_url = f'{GITHUB_CONTENT_TAG_URL}/scripts/{self.script_in_name}' - script = requests.get(script_url, allow_redirects=True).text + script = None + with requests.Session() as s: + script = s.get(script_url, allow_redirects=True).text # - tokens used in script - SCRIPT_FUSE_DIR_TOKEN= "FUSE_DIR='__FUSE_DIR__'" # <- ' added + SCRIPT_FUSE_DIR_TOKEN= "FUSE_DIR='__FUSE_DIR__'" # <- ' added SCRIPT_GITHUB_VERSION_TOKEN = 'GITHUB_VERSION=__GITHUB_VERSION__' SCRIPT_MOSAIC_PIP_VERSION_TOKEN = "MOSAIC_PIP_VERSION='__MOSAIC_PIP_VERSION__'" # <- ' added SCRIPT_WITH_MOSAIC_TOKEN = 'WITH_MOSAIC=0' @@ -119,17 +124,21 @@ def configure(self) -> None: ) # - set the mosaic version for pip - pip_str='' - if self.override_mosaic_version is not None: + if ( + self.override_mosaic_version is not None and + not self.override_mosaic_version == 'main' + ): pip_str = f'=={self.override_mosaic_version}' - if any(c in self.override_mosaic_version for c in ['=','<','<']): - pip_str = self.override_mosaic_version + if any(c in self.override_mosaic_version for c in ['=','>','<']): + pip_str = f"""{self.override_mosaic_version.replace("'","").replace('"','')}""" + else: + pip_str = f"=={self.override_mosaic_version}" elif mosaic_version is not None: - pip_str = f'=={mosaic_version}' + pip_str = f"=={mosaic_version}" script = script.replace( SCRIPT_MOSAIC_PIP_VERSION_TOKEN, SCRIPT_MOSAIC_PIP_VERSION_TOKEN.replace( - '__MOSAIC_PIP_VERSION__', pip_str) + "__MOSAIC_PIP_VERSION__", pip_str) ) # - write the configured init script @@ -139,35 +148,50 @@ def configure(self) -> None: # --- end of script config --- with_resources = self.jar_copy or self.jni_so_copy - if with_resources: + resource_statuses = {} + if with_resources: + CHUNK_SIZE = 1024 * 1024 * 64 # 64MB # - handle jar copy if self.jar_copy: # url and version details GITHUB_RELEASE_URL_BASE = 'https://github.com/databrickslabs/mosaic/releases' resource_version = github_version - if github_version is None: - latest = str(requests.get(f'{GITHUB_RELEASE_URL_BASE}/latest', allow_redirects=True).content) + if github_version == 'main': + latest = None + with requests.Session() as s: + latest = str(s.get(f'{GITHUB_RELEASE_URL_BASE}/latest', allow_redirects=True).content) resource_version = latest.split("/tag/v_")[1].split('"')[0] - - # download jar + # download jar jar_filename = f'mosaic-{resource_version}-jar-with-dependencies.jar' - jar_url = f'{GITHUB_RELEASE_URL_BASE}/download/v_{resource_version}/{jar_filename}' - jar_request = requests.get(jar_url, allow_redirects=True, stream=True) - with open(f'{self.to_fuse_dir}/{jar_filename}', 'wb') as jar_file: - shutil.copyfileobj(jar_request.raw, jar_file) - - # - handle so copy + jar_path = f'{self.to_fuse_dir}/{jar_filename}' + with requests.Session() as s: + r = s.get( + f'{GITHUB_RELEASE_URL_BASE}/download/v_{resource_version}/{jar_filename}', + stream=True + ) + with open(jar_path, 'wb') as f: + for ch in r.iter_content(chunk_size=CHUNK_SIZE): + f.write(ch) + resource_statuses[jar_filename] = r.status_code + # - handle so copy if self.jni_so_copy: - for so_filename in ['libgdalalljni.so', 'libgdalalljni.so.30', 'libgdalalljni.so.30.0.3']: - so_url = f'{GITHUB_CONTENT_TAG_URL}/resources/gdal/jammy/{so_filename}' - so_request = requests.get(so_url, allow_redirects=True, stream=True) - with open(f'{self.to_fuse_dir}/{so_filename}', 'wb') as so_file: - shutil.copyfileobj(so_request.raw, so_file) - + with requests.Session() as s: + for so_filename in ['libgdalalljni.so', 'libgdalalljni.so.30', 'libgdalalljni.so.30.0.3']: + so_path = f'{self.to_fuse_dir}/{so_filename}' + r = s.get( + f'{GITHUB_CONTENT_TAG_URL}/resources/gdal/jammy/{so_filename}', + stream=True + ) + with open(so_path, 'wb') as f: + for ch in r.iter_content(chunk_size=CHUNK_SIZE): + f.write(ch) + resource_statuses[so_filename] = r.status_code + # - echo status print(f"::: Install setup complete :::") print(f"- Settings: 'with_mosaic_pip'? {self.with_mosaic_pip}, 'with_gdal'? {self.with_gdal}, 'with_ubuntugis'? {self.with_ubuntugis}") - print(f" 'override_mosaic_version'? {self.override_mosaic_version}, 'jar_copy'? {self.jar_copy}, 'jni_so_copy'? {self.jni_so_copy}") + print(f" 'jar_copy'? {self.jar_copy}, 'jni_so_copy'? {self.jni_so_copy}, 'override_mosaic_version'? {self.override_mosaic_version}") + print(f"- Derived: 'mosaic_version'? {mosaic_version}, 'github_version'? {github_version}, 'release_version'? {release_version}, 'pip_str'? {pip_str}") print(f"- Fuse Dir: '{self.to_fuse_dir}'") if with_script: print(f"- Init Script: configured and stored at '{self.script_out_name}'; ", end='') @@ -175,13 +199,22 @@ def configure(self) -> None: print(f" more at https://docs.databricks.com/en/init-scripts/cluster-scoped.html") if with_resources: print(f"- Resource(s): copied") + print(resource_statuses) print("\n") + + if ( + not any(resource_statuses) or + all(value == 200 for value in resource_statuses.values()) + ): + return True + else: + return False def setup_fuse_install( to_fuse_dir: str, with_mosaic_pip: bool, with_gdal: bool, with_ubuntugis: bool = False, script_out_name: str = 'mosaic-fuse-init.sh', override_mosaic_version: str = None, jar_copy: bool = True, jni_so_copy: bool = True -) -> None: +) -> bool: """ [1] Copies Mosaic "fat" JAR (with dependencies) into `to_fuse_dir` - by default, version will match the current mosaic version executing the command, @@ -233,7 +266,7 @@ def setup_fuse_install( jni_so_copy: bool Whether to copy the GDAL JNI shared objects; default is True. - Returns + Returns True unless resources fail to download. ------- """ setup_mgr = SetupMgr( @@ -246,4 +279,4 @@ def setup_fuse_install( jar_copy = jar_copy, jni_so_copy = jni_so_copy ) - setup_mgr.configure() + return setup_mgr.configure() diff --git a/python/mosaic/api/gdal.py b/python/mosaic/api/gdal.py index 024002e89..9a44c446b 100644 --- a/python/mosaic/api/gdal.py +++ b/python/mosaic/api/gdal.py @@ -7,11 +7,11 @@ def setup_gdal( - to_fuse_dir: str = '/dbfs/FileStore/geospatial/mosaic/gdal/jammy', + to_fuse_dir: str = '/Workspace/Shared/geospatial/mosaic/gdal/jammy', with_mosaic_pip: bool = False, with_ubuntugis: bool = False, script_out_name: str = 'mosaic-gdal-init.sh', override_mosaic_version: str = None -) -> None: +) -> bool: """ Prepare GDAL init script and shared objects required for GDAL to run on spark. This function will generate the init script that will install GDAL on each worker node. @@ -28,7 +28,7 @@ def setup_gdal( ---------- to_fuse_dir : str Path to write out the init script for GDAL installation; - default is '/dbfs/FileStore/geospatial/mosaic/gdal/jammy'. + default is '/Workspace/Shared/geospatial/mosaic/gdal/jammy'. with_mosaic_pip : bool Whether to configure a script that pip installs databricks-mosaic, fixed to the current version; default is False. @@ -43,7 +43,7 @@ def setup_gdal( e.g. '==0.4.0' or '<0.5,>=0.4'; default is None. - Returns + Returns True unless resources fail to download. ------- """ setup_mgr = SetupMgr( @@ -53,7 +53,7 @@ def setup_gdal( script_out_name = script_out_name, override_mosaic_version = override_mosaic_version, ) - setup_mgr.configure() + return setup_mgr.configure() def enable_gdal(spark: SparkSession) -> None: diff --git a/python/mosaic/api/raster.py b/python/mosaic/api/raster.py index ffc29cebe..d27f669bc 100644 --- a/python/mosaic/api/raster.py +++ b/python/mosaic/api/raster.py @@ -15,6 +15,7 @@ "rst_combineavg", "rst_derivedband", "rst_frombands", + "rst_fromcontent", "rst_fromfile", "rst_georeference", "rst_getnodata", @@ -916,6 +917,21 @@ def rst_tessellate(raster: ColumnOrName, resolution: ColumnOrName) -> Column: ) +def rst_fromcontent(raster: ColumnOrName, driver: ColumnOrName, sizeInMB: ColumnOrName) -> Column: + """ + Tiles the raster binary into tiles of the given size. + :param raster: + :param driver: + :param sizeInMB: + :return: + """ + + return config.mosaic_context.invoke_function( + "rst_fromcontent", pyspark_to_java_column(raster), pyspark_to_java_column(driver), + pyspark_to_java_column(sizeInMB) + ) + + def rst_fromfile(raster: ColumnOrName, sizeInMB: ColumnOrName) -> Column: """ Tiles the raster into tiles of the given size. diff --git a/python/mosaic/config/config.py b/python/mosaic/config/config.py index bc5f80c9f..a59979be6 100644 --- a/python/mosaic/config/config.py +++ b/python/mosaic/config/config.py @@ -10,4 +10,3 @@ display_handler: DisplayHandler ipython_hook: InteractiveShell notebook_utils = None -default_gdal_init_script_path: str = "/dbfs/FileStore/geospatial/mosaic/gdal/" diff --git a/python/mosaic/core/library_handler.py b/python/mosaic/core/library_handler.py index 90b7eb5de..6568fd042 100644 --- a/python/mosaic/core/library_handler.py +++ b/python/mosaic/core/library_handler.py @@ -11,30 +11,32 @@ class MosaicLibraryHandler: _jar_path = None _jar_filename = None _auto_attached_enabled = None - - def __init__(self, spark): + + def __init__(self, spark, log_info: bool = True): self.spark = spark self.sc = spark.sparkContext - self.sc.setLogLevel("info") - log4jLogger = self.sc._jvm.org.apache.log4j - LOGGER = log4jLogger.LogManager.getLogger(__class__.__name__) + LOGGER = None + if log_info: + log4jLogger = self.sc._jvm.org.apache.log4j + LOGGER = log4jLogger.LogManager.getLogger(__class__.__name__) if self.auto_attach_enabled: - LOGGER.info(f"Looking for Mosaic JAR at {self.mosaic_library_location}.") - if not os.path.exists(self.mosaic_library_location): + jar_path = self.mosaic_library_location + LOGGER and LOGGER.info(f"Looking for Mosaic JAR at {jar_path}.") + if not os.path.exists(jar_path): raise FileNotFoundError( - f"Mosaic JAR package {self._jar_filename} could not be located at {self.mosaic_library_location}." + f"Mosaic JAR package {self._jar_filename} could not be located at {jar_path}." ) - LOGGER.info(f"Automatically attaching Mosaic JAR to cluster.") + LOGGER and LOGGER.info(f"Automatically attaching Mosaic JAR to cluster.") self.auto_attach() @property def auto_attach_enabled(self) -> bool: - if not self._auto_attached_enabled: + if self._auto_attached_enabled is None: try: result = ( self.spark.conf.get("spark.databricks.labs.mosaic.jar.autoattach") - == "true" + == 'true' ) except Py4JJavaError as e: result = True @@ -43,7 +45,7 @@ def auto_attach_enabled(self) -> bool: @property def mosaic_library_location(self): - if not self._jar_path: + if self._jar_path is None: try: self._jar_path = self.spark.conf.get( "spark.databricks.labs.mosaic.jar.path" @@ -81,7 +83,6 @@ def auto_attach(self): converters = self.sc._jvm.scala.collection.JavaConverters JarURI = JavaURI.create("file:" + self._jar_path) - dbr_version = self.spark.conf.get("spark.databricks.clusterUsageTags.sparkVersion").split("-")[0] try: # This will fix the exception when running on Databricks Runtime 13.x+ diff --git a/python/setup.cfg b/python/setup.cfg index e8d0eb2fe..01ca109a2 100644 --- a/python/setup.cfg +++ b/python/setup.cfg @@ -12,18 +12,16 @@ classifiers = Topic :: Scientific/Engineering :: GIS Programming Language :: Python Programming Language :: Python :: 3 - Programming Language :: Python :: 3.7 + Programming Language :: Python :: 3.10 [options] packages = find: python_requires = >=3.10.0 -setup_requires = - pyspark==3.4.0 - ipython>=7.22.0 - install_requires = keplergl==0.3.2 h3==3.7.0 + pyspark<3.5,>=3.4 + ipython>=7.22.0 [options.package_data] mosaic = diff --git a/python/test/test_display_handler.py b/python/test/test_display_handler.py index 89e69324c..ed1661f12 100644 --- a/python/test/test_display_handler.py +++ b/python/test/test_display_handler.py @@ -4,6 +4,9 @@ class TestDisplayHandler(MosaicTestCase): + def setUp(self) -> None: + return super().setUp() + def test_display(self): df = self.wkt_boroughs() poly_df = df.select(st_makepolygon(st_geomfromwkt("wkt")).alias("polygon_geom")) diff --git a/python/test/test_fuse_install.py b/python/test/test_fuse_install.py new file mode 100644 index 000000000..7c2208b18 --- /dev/null +++ b/python/test/test_fuse_install.py @@ -0,0 +1,54 @@ +from .utils import SparkTestCase, FuseInstaller + + +class TestFuseInstall(SparkTestCase): + def setUp(self) -> None: + return super().setUp() + + def test_setup_no_op(self): + installer = FuseInstaller(False, False, jar_copy=False, jni_so_copy=False) + try: + self.assertTrue(installer.do_op()) + except Exception: + self.fail("Executing `setup_fuse_install()` raised an exception.") + + self.assertEqual(len(installer.list_files()), 0) # <- nothing generated + + def test_setup_jar_only(self): + installer = FuseInstaller(False, False, jar_copy=True, jni_so_copy=False) + try: + self.assertTrue(installer.do_op()) + except Exception: + self.fail("Executing `setup_fuse_install()` raised an exception.") + + def test_setup_sh_pip_only(self): + installer = FuseInstaller(True, False, jar_copy=False, jni_so_copy=False) + try: + self.assertTrue(installer.do_op()) + except Exception: + self.fail("Executing `setup_fuse_install()` raised an exception.") + + self.assertEqual(len(installer.list_files()), 1) # <- just init script + + def test_setup_sh_gdal(self): + installer = FuseInstaller(False, True, jar_copy=False, jni_so_copy=False) + try: + self.assertTrue(installer.do_op()) + except Exception: + self.fail("Executing `setup_fuse_install()` raised an exception.") + + self.assertEqual(len(installer.list_files()), 1) # <- just init script + + def test_setup_sh_gdal_jni(self): + installer = FuseInstaller(False, True, jar_copy=False, jni_so_copy=True) + try: + self.assertTrue(installer.do_op()) + except Exception: + self.fail("Executing `setup_fuse_install()` raised an exception.") + + def test_setup_sh_all(self): + installer = FuseInstaller(True, True, jar_copy=True, jni_so_copy=True) + try: + self.assertTrue(installer.do_op()) + except Exception: + self.fail("Executing `setup_fuse_install()` raised an exception.") diff --git a/python/test/test_gdal_install.py b/python/test/test_gdal_install.py index 3d2125fb2..340b96841 100644 --- a/python/test/test_gdal_install.py +++ b/python/test/test_gdal_install.py @@ -2,19 +2,22 @@ class TestGDALInstall(SparkTestCase): + def setUp(self) -> None: + return super().setUp() + def test_setup_gdal(self): installer = GDALInstaller() try: - installer.copy_objects() + self.assertTrue(installer.do_op()) except Exception: self.fail("Copying objects with `setup_gdal()` raised an exception.") + + self.assertEqual(len(installer.list_files()), 1) # <- just init script try: installer_result = installer.run_init_script() + self.assertEqual(installer_result, 0) + gdalinfo_result = installer.test_gdalinfo() + self.assertEqual(gdalinfo_result, "GDAL 3.4.1, released 2021/12/27\n") except Exception: self.fail("Execution of GDAL init script raised an exception.") - - self.assertEqual(installer_result, 0) - - gdalinfo_result = installer.test_gdalinfo() - self.assertEqual(gdalinfo_result, "GDAL 3.4.1, released 2021/12/27\n") diff --git a/python/test/test_library_handler.py b/python/test/test_library_handler.py index 6b782dc41..8c275e8c2 100644 --- a/python/test/test_library_handler.py +++ b/python/test/test_library_handler.py @@ -5,6 +5,9 @@ class TestMosaicLibraryHandler(SparkTestCase): + def setUp(self) -> None: + return super().setUp() + def test_auto_attach_enabled(self): handler = MosaicLibraryHandler(self.spark) self.assertFalse(handler.auto_attach_enabled) diff --git a/python/test/test_mosaic.py b/python/test/test_mosaic.py index 163e8f7fc..149afd06c 100644 --- a/python/test/test_mosaic.py +++ b/python/test/test_mosaic.py @@ -5,6 +5,9 @@ class TestMosaicContext(SparkTestCase): + def setUp(self) -> None: + return super().setUp() + def test_invoke_function(self): _ = MosaicLibraryHandler(self.spark) context = MosaicContext(self.spark) diff --git a/python/test/test_raster_functions.py b/python/test/test_raster_functions.py index cfbcb39ee..f8e09b6f5 100644 --- a/python/test/test_raster_functions.py +++ b/python/test/test_raster_functions.py @@ -5,6 +5,9 @@ class TestRasterFunctions(MosaicTestCaseWithGDAL): + def setUp(self) -> None: + return super().setUp() + def test_read_raster(self): result = self.generate_singleband_raster_df().first() self.assertEqual(result.length, 1067862) diff --git a/python/test/test_vector_functions.py b/python/test/test_vector_functions.py index 2d189caba..67cfc3cf2 100644 --- a/python/test/test_vector_functions.py +++ b/python/test/test_vector_functions.py @@ -7,6 +7,9 @@ class TestVectorFunctions(MosaicTestCase): + def setUp(self) -> None: + return super().setUp() + def test_st_point(self): expected = [ "POINT (0 0)", diff --git a/python/test/utils/__init__.py b/python/test/utils/__init__.py index c46240505..cdcf84086 100644 --- a/python/test/utils/__init__.py +++ b/python/test/utils/__init__.py @@ -1,3 +1,4 @@ from .mosaic_test_case import * from .mosaic_test_case_with_gdal import * from .setup_gdal import GDALInstaller +from .setup_fuse import FuseInstaller diff --git a/python/test/utils/mosaic_test_case.py b/python/test/utils/mosaic_test_case.py index 44f5a81df..986f9dc79 100644 --- a/python/test/utils/mosaic_test_case.py +++ b/python/test/utils/mosaic_test_case.py @@ -9,10 +9,16 @@ class MosaicTestCase(SparkTestCase): + def setUp(self) -> None: + return super.setUp() + @classmethod def setUpClass(cls) -> None: super().setUpClass() api.enable_mosaic(cls.spark) + + def setUp(self) -> None: + return super().setUp() def generate_input_single_linestring(self) -> DataFrame: return self.spark.createDataFrame( diff --git a/python/test/utils/mosaic_test_case_with_gdal.py b/python/test/utils/mosaic_test_case_with_gdal.py index 233698bab..cbcf1aa13 100644 --- a/python/test/utils/mosaic_test_case_with_gdal.py +++ b/python/test/utils/mosaic_test_case_with_gdal.py @@ -6,6 +6,9 @@ class MosaicTestCaseWithGDAL(MosaicTestCase): + def setUp(self) -> None: + return super().setUp() + @classmethod def setUpClass(cls) -> None: super().setUpClass() diff --git a/python/test/utils/setup_fuse.py b/python/test/utils/setup_fuse.py new file mode 100644 index 000000000..a10a4db66 --- /dev/null +++ b/python/test/utils/setup_fuse.py @@ -0,0 +1,49 @@ +from pkg_resources import working_set, Requirement +from test.context import api + +import os +import shutil +import subprocess +import tempfile + +class FuseInstaller: + def __init__( + self, with_mosaic_pip, with_gdal, + jar_copy = False, jni_so_copy = False + ): + self._site_packages = working_set.find(Requirement("keplergl")).location + self._temp_dir = tempfile.mkdtemp() + self.with_mosaic_pip = with_mosaic_pip + self.with_gdal = with_gdal + self.jar_copy = jar_copy + self.jni_so_copy = jni_so_copy + self.FUSE_INIT_SCRIPT_FILENAME = "mosaic-fuse-init.sh" + + def __del__(self): + shutil.rmtree(self._temp_dir) + + def do_op(self) -> bool: + return api.setup_fuse_install( + self._temp_dir, + self.with_mosaic_pip, + self.with_gdal, + jar_copy=self.jar_copy, + jni_so_copy=self.jni_so_copy, + override_mosaic_version="main", + script_out_name=self.FUSE_INIT_SCRIPT_FILENAME + ) + + def run_init_script(self) -> int: + fuse_install_script_target = os.path.join( + self._temp_dir, self.FUSE_INIT_SCRIPT_FILENAME + ) + os.chmod(fuse_install_script_target, mode=0x744) + result = subprocess.run( + [fuse_install_script_target], + stdout=subprocess.DEVNULL, + env=dict(os.environ, DATABRICKS_ROOT_VIRTUALENV_ENV=self._site_packages), + ) + return result.returncode + + def list_files(self) ->list[str]: + return os.listdir(self._temp_dir) diff --git a/python/test/utils/setup_gdal.py b/python/test/utils/setup_gdal.py index 4bd577a89..097ee77e2 100644 --- a/python/test/utils/setup_gdal.py +++ b/python/test/utils/setup_gdal.py @@ -1,35 +1,42 @@ -import os -import tempfile -import subprocess from pkg_resources import working_set, Requirement - from test.context import api +import os +import shutil +import subprocess +import tempfile class GDALInstaller: def __init__(self): self._site_packages = working_set.find(Requirement("keplergl")).location - self._temp_dir = tempfile.TemporaryDirectory() + self._temp_dir = tempfile.mkdtemp() + self.GDAL_INIT_SCRIPT_FILENAME = "mosaic-gdal-init.sh" def __del__(self): - self._temp_dir.cleanup() + shutil.rmtree(self._temp_dir) - def copy_objects(self): - api.setup_gdal(self._temp_dir.name) + def do_op(self) -> bool: + return api.setup_gdal( + to_fuse_dir = self._temp_dir, + override_mosaic_version="main", + script_out_name=self.GDAL_INIT_SCRIPT_FILENAME + ) - def run_init_script(self): + def run_init_script(self) -> int: gdal_install_script_target = os.path.join( - self._temp_dir.name, "mosaic-gdal-init.sh" + self._temp_dir, self.GDAL_INIT_SCRIPT_FILENAME ) os.chmod(gdal_install_script_target, mode=0x744) result = subprocess.run( [gdal_install_script_target], - stdout=subprocess.PIPE, + stdout=subprocess.DEVNULL, env=dict(os.environ, DATABRICKS_ROOT_VIRTUALENV_ENV=self._site_packages), ) - print(result.stdout.decode()) return result.returncode + + def list_files(self) -> list[str]: + return os.listdir(self._temp_dir) - def test_gdalinfo(self): + def test_gdalinfo(self) -> str: result = subprocess.run(["gdalinfo", "--version"], stdout=subprocess.PIPE) return result.stdout.decode() diff --git a/python/test/utils/spark_test_case.py b/python/test/utils/spark_test_case.py index 2c92b5758..98d2743bf 100644 --- a/python/test/utils/spark_test_case.py +++ b/python/test/utils/spark_test_case.py @@ -1,31 +1,33 @@ -import unittest -import os from importlib.metadata import version - from pyspark.sql import SparkSession import mosaic - +import os +import unittest class SparkTestCase(unittest.TestCase): spark = None library_location = None - + log4jref = None + @classmethod def setUpClass(cls) -> None: cls.library_location = f"{mosaic.__path__[0]}/lib/mosaic-{version('databricks-mosaic')}-jar-with-dependencies.jar" if not os.path.exists(cls.library_location): cls.library_location = f"{mosaic.__path__[0]}/lib/mosaic-{version('databricks-mosaic')}-SNAPSHOT-jar-with-dependencies.jar" - cls.spark = ( SparkSession.builder.master("local") .config("spark.jars", cls.library_location) + .config("spark.driver.extraJavaOptions", "-Dorg.apache.logging.log4j.level=FATAL") + .config("spark.executor.extraJavaOptions", "-Dorg.apache.logging.log4j.level=FATAL") .getOrCreate() ) cls.spark.conf.set("spark.databricks.labs.mosaic.jar.autoattach", "false") - cls.spark.sparkContext.setLogLevel("WARN") - + cls.spark.sparkContext.setLogLevel("FATAL") + @classmethod def tearDownClass(cls) -> None: - cls.spark.sparkContext.setLogLevel("warn") cls.spark.stop() + + def setUp(self) -> None: + self.spark.sparkContext.setLogLevel("FATAL") diff --git a/scripts/mosaic-gdal-init.sh b/scripts/mosaic-gdal-init.sh index 27f8a2b0c..950b0ffe9 100644 --- a/scripts/mosaic-gdal-init.sh +++ b/scripts/mosaic-gdal-init.sh @@ -11,7 +11,7 @@ # - setup_gdal(...) # [4] this script has conditional logic based on variables # Author: Michael Johns | mjohns@databricks.com -# Last Modified: 20 NOV, 2023 +# Last Modified: 05 JAN, 2024 # TEMPLATE-BASED REPLACEMENT # - can also be manually specified @@ -26,16 +26,7 @@ WITH_UBUNTUGIS=0 # <- use ubuntugis ppa? WITH_FUSE_SO=0 # <- use fuse dir shared objects (vs wget) # SPECIFIED VERSIONS -# - may be changed by conditional logic -GDAL_VERSION=3.4.1 # <- matches Jammy (default) -NUMPY_VERSION=1.26.2 # <- for GDAL -SCIPY_VERSION='<1.12,>=1.11' # <- adjusted for numpy - -# - optional: install Mosaic -if [ $WITH_MOSAIC == 1 ] -then - pip install "databricks-mosaic$MOSAIC_PIP_VERSION" -fi +GDAL_VERSION=3.4.1 # <- ubuntugis is 3.4.3 # - optional: install GDAL if [ $WITH_GDAL == 1 ] @@ -48,36 +39,38 @@ then if [ $WITH_UBUNTUGIS == 1 ] then sudo add-apt-repository ppa:ubuntugis/ppa - GDAL_VERSION=3.4.3 # <- update gdal version + GDAL_VERSION=3.4.3 fi sudo apt-get update -y - - # - install numpy first - pip install --upgrade pip - pip install --no-cache-dir --force-reinstall numpy==$NUMPY_VERSION - pip install "scipy$SCIPY_VERSION" # - install natives - sudo apt-get install -y gdal-bin libgdal-dev python3-gdal + sudo apt-get install -y unixodbc libcurl3-gnutls libsnappy-dev libopenjp2-7 + sudo apt-get install -y gdal-bin libgdal-dev python3-numpy python3-gdal - # - install gdal with numpy - pip install --no-cache-dir --force-reinstall GDAL[numpy]==$GDAL_VERSION + # - pip install gdal + pip install --upgrade pip + pip install gdal==$GDAL_VERSION # - add pre-build JNI shared object to the path if [ $WITH_FUSE_SO == 1 ] then - # copy from fuse dir - cp $FUSE_DIR/libgdalalljni.so /usr/lib - cp $FUSE_DIR/libgdalalljni.so.30 /usr/lib - cp $FUSE_DIR/libgdalalljni.so.30.0.3 /usr/lib + # copy from fuse dir with no-clobber + sudo cp -n $FUSE_DIR/libgdalalljni.so /usr/lib + sudo cp -n $FUSE_DIR/libgdalalljni.so.30 /usr/lib + sudo cp -n $FUSE_DIR/libgdalalljni.so.30.0.3 /usr/lib else # copy from github - # - !!! TODO: MODIFY PATH ONCE PR MERGES !!! - # - THIS WILL USE GITHUB_VERSION - GITHUB_REPO_PATH=databrickslabs/mosaic/main/src/main/resources/gdal/ubuntu - - sudo wget -P /usr/lib -nc https://raw.githubusercontent.com/$GITHUB_REPO_PATH/libgdalalljni.so - sudo wget -P /usr/lib -nc https://raw.githubusercontent.com/$GITHUB_REPO_PATH/libgdalalljni.so.30 - #sudo wget -P /usr/lib -nc https://raw.githubusercontent.com/$GITHUB_REPO_PATH/libgdalalljni.so.30.0.3 + # TODO: in v0.4.1, include $GITHUB_VERSION + GITHUB_REPO_PATH=databrickslabs/mosaic/main/resources/gdal/jammy + sudo wget -nv -P /usr/lib -nc https://raw.githubusercontent.com/$GITHUB_REPO_PATH/libgdalalljni.so + sudo wget -nv -P /usr/lib -nc https://raw.githubusercontent.com/$GITHUB_REPO_PATH/libgdalalljni.so.30 + sudo wget -nv -P /usr/lib -nc https://raw.githubusercontent.com/$GITHUB_REPO_PATH/libgdalalljni.so.30.0.3 fi fi + +# - optional: install Mosaic +if [ $WITH_MOSAIC == 1 ] +then + pip install --upgrade pip + pip install "databricks-mosaic$MOSAIC_PIP_VERSION" +fi diff --git a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromContent.scala b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromContent.scala new file mode 100644 index 000000000..bd2926bcb --- /dev/null +++ b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromContent.scala @@ -0,0 +1,128 @@ +package com.databricks.labs.mosaic.expressions.raster + +import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI +import com.databricks.labs.mosaic.core.index.{IndexSystem, IndexSystemFactory} +import com.databricks.labs.mosaic.core.raster.api.GDAL +import com.databricks.labs.mosaic.core.raster.gdal.MosaicRasterGDAL +import com.databricks.labs.mosaic.core.raster.io.RasterCleaner +import com.databricks.labs.mosaic.core.types.RasterTileType +import com.databricks.labs.mosaic.core.types.model.MosaicRasterTile +import com.databricks.labs.mosaic.datasource.gdal.ReTileOnRead +import com.databricks.labs.mosaic.expressions.base.{GenericExpressionFactory, WithExpressionInfo} +import com.databricks.labs.mosaic.functions.MosaicExpressionConfig +import com.databricks.labs.mosaic.utils.PathUtils +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.analysis.FunctionRegistry.FunctionBuilder +import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback +import org.apache.spark.sql.catalyst.expressions.{CollectionGenerator, Expression, Literal, NullIntolerant} +import org.apache.spark.sql.types.{DataType, IntegerType, StringType, StructField, StructType} +import org.apache.spark.unsafe.types.UTF8String + +import java.nio.file.{Files, Paths} + +/** + * The raster for construction of a raster tile. This should be the first + * expression in the expression tree for a raster tile. + */ +case class RST_FromContent( + rasterExpr: Expression, + driverExpr: Expression, + sizeInMB: Expression, + expressionConfig: MosaicExpressionConfig +) extends CollectionGenerator + with Serializable + with NullIntolerant + with CodegenFallback { + + override def dataType: DataType = RasterTileType(expressionConfig.getCellIdType) + + protected val geometryAPI: GeometryAPI = GeometryAPI.apply(expressionConfig.getGeometryAPI) + + protected val indexSystem: IndexSystem = IndexSystemFactory.getIndexSystem(expressionConfig.getIndexSystem) + + protected val cellIdDataType: DataType = indexSystem.getCellIdDataType + + override def position: Boolean = false + + override def inline: Boolean = false + + override def children: Seq[Expression] = Seq(rasterExpr, driverExpr, sizeInMB) + + override def elementSchema: StructType = StructType(Array(StructField("tile", dataType))) + + /** + * subdivides raster binary content into tiles of the specified size (in MB). + * @param input + * The input file path. + * @return + * The tiles. + */ + override def eval(input: InternalRow): TraversableOnce[InternalRow] = { + GDAL.enable(expressionConfig) + val driver = driverExpr.eval(input).asInstanceOf[UTF8String].toString + val ext = GDAL.getExtension(driver) + var rasterArr = rasterExpr.eval(input).asInstanceOf[Array[Byte]] + val targetSize = sizeInMB.eval(input).asInstanceOf[Int] + if (targetSize <= 0 || rasterArr.length <= targetSize) { + // - no split required + var raster = MosaicRasterGDAL.readRaster(rasterArr, PathUtils.NO_PATH_STRING, driver) + var tile = MosaicRasterTile(null, raster, PathUtils.NO_PATH_STRING, driver) + val row = tile.formatCellId(indexSystem).serialize() + RasterCleaner.dispose(raster) + RasterCleaner.dispose(tile) + rasterArr = null + raster = null + tile = null + Seq(InternalRow.fromSeq(Seq(row))) + } else { + // target size is > 0 and raster size > target size + // - write the initial raster to file (unsplit) + // - createDirectories in case of context isolation + val rasterPath = PathUtils.createTmpFilePath(ext) + Files.createDirectories(Paths.get(rasterPath).getParent) + Files.write(Paths.get(rasterPath), rasterArr) + + // split to tiles up to specifed threshold + var tiles = ReTileOnRead.localSubdivide(rasterPath, PathUtils.NO_PATH_STRING, targetSize) + val rows = tiles.map(_.formatCellId(indexSystem).serialize()) + tiles.foreach(RasterCleaner.dispose(_)) + Files.deleteIfExists(Paths.get(rasterPath)) + rasterArr = null + tiles = null + rows.map(row => InternalRow.fromSeq(Seq(row))) + } + } + + override def makeCopy(newArgs: Array[AnyRef]): Expression = + GenericExpressionFactory.makeCopyImpl[RST_FromContent](this, newArgs, children.length, expressionConfig) + + override def withNewChildrenInternal(newChildren: IndexedSeq[Expression]): Expression = makeCopy(newChildren.toArray) + +} + +/** Expression info required for the expression registration for spark SQL. */ +object RST_FromContent extends WithExpressionInfo { + + override def name: String = "rst_fromcontent" + + override def usage: String = + """ + |_FUNC_(expr1, expr2, expr3) - Returns raster tiles from binary content within threshold in MBs. + |""".stripMargin + + override def example: String = + """ + | Examples: + | > SELECT _FUNC_(raster, driver, sizeInMB); + | {index_id, raster, parentPath, driver} + | ... + | """.stripMargin + + override def builder(expressionConfig: MosaicExpressionConfig): FunctionBuilder = { + (children: Seq[Expression]) => { + val sizeExpr = if (children.length < 3) new Literal(-1, IntegerType) else children(2) + RST_FromContent(children(0), children(1), sizeExpr, expressionConfig) + } + } + +} diff --git a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_MapAlgebra.scala b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_MapAlgebra.scala index 9cb744128..53e84d96b 100644 --- a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_MapAlgebra.scala +++ b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_MapAlgebra.scala @@ -13,7 +13,7 @@ import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback import org.apache.spark.sql.catalyst.expressions.{Expression, NullIntolerant} import org.apache.spark.unsafe.types.UTF8String -/** The expression for computing NDVI index. */ +/** The expression for map algebra. */ case class RST_MapAlgebra( rastersExpr: Expression, jsonSpecExpr: Expression, @@ -29,13 +29,13 @@ case class RST_MapAlgebra( with CodegenFallback { /** - * Computes NDVI index. + * Map Algebra. * @param tiles * The raster to be used. * @param arg1 * The red band index. * @return - * The raster contains NDVI index. + * The raster (tile) from the calculation. */ override def rasterTransform(tiles: Seq[MosaicRasterTile], arg1: Any): Any = { val jsonSpec = arg1.asInstanceOf[UTF8String].toString diff --git a/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala b/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala index 8e483c702..dc061d597 100644 --- a/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala +++ b/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala @@ -294,6 +294,7 @@ class MosaicContext(indexSystem: IndexSystem, geometryAPI: GeometryAPI) extends mosaicRegistry.registerExpression[RST_Subdatasets](expressionConfig) mosaicRegistry.registerExpression[RST_Summary](expressionConfig) mosaicRegistry.registerExpression[RST_Tessellate](expressionConfig) + mosaicRegistry.registerExpression[RST_FromContent](expressionConfig) mosaicRegistry.registerExpression[RST_FromFile](expressionConfig) mosaicRegistry.registerExpression[RST_ToOverlappingTiles](expressionConfig) mosaicRegistry.registerExpression[RST_TryOpen](expressionConfig) @@ -703,7 +704,16 @@ class MosaicContext(indexSystem: IndexSystem, geometryAPI: GeometryAPI) extends ColumnAdapter(RST_Tessellate(raster.expr, resolution.expr, expressionConfig)) def rst_tessellate(raster: Column, resolution: Int): Column = ColumnAdapter(RST_Tessellate(raster.expr, lit(resolution).expr, expressionConfig)) - def rst_fromfile(raster: Column): Column = ColumnAdapter(RST_FromFile(raster.expr, lit(-1).expr, expressionConfig)) + def rst_fromcontent(raster: Column, driver:Column): Column = + ColumnAdapter(RST_FromContent(raster.expr, driver.expr, lit(-1).expr, expressionConfig)) + def rst_fromcontent(raster: Column, driver:Column, sizeInMB:Column): Column = + ColumnAdapter(RST_FromContent(raster.expr, driver.expr, sizeInMB.expr, expressionConfig)) + def rst_fromcontent(raster: Column, driver:String): Column = + ColumnAdapter(RST_FromContent(raster.expr, lit(driver).expr, lit(-1).expr, expressionConfig)) + def rst_fromcontent(raster: Column, driver:String, sizeInMB:Int): Column = + ColumnAdapter(RST_FromContent(raster.expr, lit(driver).expr, lit(sizeInMB).expr, expressionConfig)) + def rst_fromfile(raster: Column): Column = + ColumnAdapter(RST_FromFile(raster.expr, lit(-1).expr, expressionConfig)) def rst_fromfile(raster: Column, sizeInMB: Column): Column = ColumnAdapter(RST_FromFile(raster.expr, sizeInMB.expr, expressionConfig)) def rst_fromfile(raster: Column, sizeInMB: Int): Column = @@ -967,7 +977,7 @@ object MosaicContext extends Logging { val tmpDir: String = Files.createTempDirectory("mosaic").toAbsolutePath.toString - val mosaicVersion: String = "0.3.14" + val mosaicVersion: String = "0.4.0" private var instance: Option[MosaicContext] = None @@ -994,17 +1004,18 @@ object MosaicContext extends Logging { // noinspection ScalaStyle,ScalaWeakerAccess def checkDBR(spark: SparkSession): Boolean = { val sparkVersion = spark.conf.get("spark.databricks.clusterUsageTags.sparkVersion", "0") - val isML = sparkVersion.contains("-ml-") - val isPhoton = spark.conf.getOption("spark.databricks.photon.enabled").getOrElse("false").toBoolean - val isTest = !spark.conf.getAll.exists(_._1.startsWith("spark.databricks.clusterUsageTags.")) - val dbrMajor = sparkVersion.split("-").head.split("\\.").head.toInt - if ( - (dbrMajor < 13 && mosaicVersion >= "0.4.0") || - (dbrMajor > 12 && mosaicVersion < "0.4.0") - ) { + + val isML = sparkVersion.contains("-ml-") + val isPhoton = sparkVersion.contains("-photon-") + val isTest = ( + dbrMajor == 0 + && !spark.conf.getAll.exists(_._1.startsWith("spark.databricks.clusterUsageTags.")) + ) + + if (dbrMajor != 13 && !isTest) { val msg = """|DEPRECATION ERROR: - | Mosaic v0.3.x series only supports Databricks Runtime 12 and below. + | Mosaic v0.4.x series only supports Databricks Runtime 13. | You can specify `%pip install 'databricks-mosaic<0.4,>=0.3'` for DBR < 13.""".stripMargin logError(msg) @@ -1013,17 +1024,16 @@ object MosaicContext extends Logging { } if (!isML && !isPhoton && !isTest) { - val msg = """|DEPRECATION WARNING: + val msg = """|DEPRECATION ERROR: | Please use a Databricks: | - Photon-enabled Runtime for performance benefits | - Runtime ML for spatial AI benefits - | Mosaic will stop working on this cluster after v0.3.x.""".stripMargin - logWarning(msg) + | Mosaic 0.4.x series restricts executing this cluster.""".stripMargin + logError(msg) println(msg) - false - } else { - true - } + throw new Exception(msg) + } + true } } diff --git a/src/main/scala/com/databricks/labs/mosaic/package.scala b/src/main/scala/com/databricks/labs/mosaic/package.scala index 543592fbc..58ee2f98e 100644 --- a/src/main/scala/com/databricks/labs/mosaic/package.scala +++ b/src/main/scala/com/databricks/labs/mosaic/package.scala @@ -28,6 +28,7 @@ package object mosaic { val MOSAIC_RASTER_READ_AS_PATH = "as_path" val MOSAIC_RASTER_RE_TILE_ON_READ = "retile_on_read" + def read: MosaicDataFrameReader = new MosaicDataFrameReader(SparkSession.builder().getOrCreate()) } diff --git a/src/main/scala/com/databricks/labs/mosaic/utils/PathUtils.scala b/src/main/scala/com/databricks/labs/mosaic/utils/PathUtils.scala index f3fb9d7b9..d48c03bfd 100644 --- a/src/main/scala/com/databricks/labs/mosaic/utils/PathUtils.scala +++ b/src/main/scala/com/databricks/labs/mosaic/utils/PathUtils.scala @@ -8,8 +8,13 @@ import java.nio.file.{Files, Paths} object PathUtils { + val NO_PATH_STRING = "no_path" + def getCleanPath(path: String): String = { - val cleanPath = path.replace("file:/", "/").replace("dbfs:/", "/dbfs/") + val cleanPath = path + .replace("file:/", "/") + .replace("dbfs:/Volumes", "/Volumes") + .replace("dbfs:/","/dbfs/") if (cleanPath.endsWith(".zip") || cleanPath.contains(".zip:")) { getZipPath(cleanPath) } else { @@ -57,10 +62,12 @@ object PathUtils { result } - def copyToTmp(inPath: String): String = { - val cleanPath = getCleanPath(inPath) - val copyFromPath = inPath.replace("file:/", "/").replace("dbfs:/", "/dbfs/") - val driver = MosaicRasterGDAL.identifyDriver(cleanPath) + def copyToTmp(inPath: String): String = { + val copyFromPath = inPath + .replace("file:/", "/") + .replace("dbfs:/Volumes", "/Volumes") + .replace("dbfs:/","/dbfs/") + val driver = MosaicRasterGDAL.identifyDriver(getCleanPath(inPath)) val extension = if (inPath.endsWith(".zip")) "zip" else GDAL.getExtension(driver) val tmpPath = createTmpFilePath(extension) Files.copy(Paths.get(copyFromPath), Paths.get(tmpPath)) diff --git a/src/test/scala/com/databricks/labs/mosaic/datasource/OGRFileFormatTest.scala b/src/test/scala/com/databricks/labs/mosaic/datasource/OGRFileFormatTest.scala index 6ed735d1f..f37bef949 100644 --- a/src/test/scala/com/databricks/labs/mosaic/datasource/OGRFileFormatTest.scala +++ b/src/test/scala/com/databricks/labs/mosaic/datasource/OGRFileFormatTest.scala @@ -144,7 +144,7 @@ class OGRFileFormatTest extends QueryTest with SharedSparkSessionGDAL { test("OGRFileFormat should handle partial schema: ISSUE 351") { assume(System.getProperty("os.name") == "Linux") - + spark.sparkContext.setLogLevel("FATAL") val mc = MosaicContext.build(H3, JTS) import mc.functions._ diff --git a/src/test/scala/com/databricks/labs/mosaic/datasource/multiread/RasterAsGridReaderTest.scala b/src/test/scala/com/databricks/labs/mosaic/datasource/multiread/RasterAsGridReaderTest.scala index 1f7b4008b..5e3a95bc1 100644 --- a/src/test/scala/com/databricks/labs/mosaic/datasource/multiread/RasterAsGridReaderTest.scala +++ b/src/test/scala/com/databricks/labs/mosaic/datasource/multiread/RasterAsGridReaderTest.scala @@ -14,6 +14,7 @@ class RasterAsGridReaderTest extends MosaicSpatialQueryTest with SharedSparkSess test("Read netcdf with Raster As Grid Reader") { assume(System.getProperty("os.name") == "Linux") + spark.sparkContext.setLogLevel("FATAL") MosaicContext.build(H3IndexSystem, JTS) val netcdf = "/binary/netcdf-coral/" @@ -35,6 +36,7 @@ class RasterAsGridReaderTest extends MosaicSpatialQueryTest with SharedSparkSess test("Read grib with Raster As Grid Reader") { assume(System.getProperty("os.name") == "Linux") + spark.sparkContext.setLogLevel("FATAL") MosaicContext.build(H3IndexSystem, JTS) val grib = "/binary/grib-cams/" @@ -55,6 +57,7 @@ class RasterAsGridReaderTest extends MosaicSpatialQueryTest with SharedSparkSess test("Read tif with Raster As Grid Reader") { assume(System.getProperty("os.name") == "Linux") + spark.sparkContext.setLogLevel("FATAL") MosaicContext.build(H3IndexSystem, JTS) val tif = "/modis/" @@ -73,6 +76,7 @@ class RasterAsGridReaderTest extends MosaicSpatialQueryTest with SharedSparkSess test("Read zarr with Raster As Grid Reader") { assume(System.getProperty("os.name") == "Linux") + spark.sparkContext.setLogLevel("FATAL") MosaicContext.build(H3IndexSystem, JTS) val zarr = "/binary/zarr-example/" diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_HaversineBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_HaversineBehaviors.scala index 611be6f31..f9fbd8c4b 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_HaversineBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_HaversineBehaviors.scala @@ -13,6 +13,7 @@ import org.scalatest.matchers.should.Matchers.convertToAnyShouldWrapper trait ST_HaversineBehaviors extends QueryTest { def haversineBehaviour(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { + spark.sparkContext.setLogLevel("FATAL") val mc = MosaicContext.build(indexSystem, geometryAPI) import mc.functions._ mc.register(spark) diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_RotateBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_RotateBehaviors.scala index 4741eedff..3ff0d38b0 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_RotateBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_RotateBehaviors.scala @@ -15,8 +15,8 @@ trait ST_RotateBehaviors extends QueryTest { def rotateBehaviour(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") - val sc = spark val mc = MosaicContext.build(indexSystem, geometryAPI) + val sc = spark import mc.functions._ import sc.implicits._ mc.register(spark) diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_ScaleBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_ScaleBehaviors.scala index 4901a0f04..8cd3962ba 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_ScaleBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_ScaleBehaviors.scala @@ -14,8 +14,8 @@ trait ST_ScaleBehaviors extends QueryTest { def scaleBehaviour(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") - val sc = spark val mc = MosaicContext.build(indexSystem, geometryAPI) + val sc = spark import mc.functions._ import sc.implicits._ mc.register(spark) diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_SetSRIDBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_SetSRIDBehaviors.scala index 61fd4de70..062c5bcc9 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_SetSRIDBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_SetSRIDBehaviors.scala @@ -13,8 +13,8 @@ trait ST_SetSRIDBehaviors extends QueryTest { def setSRIDBehaviour(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") - val sc = spark val mc = MosaicContext.build(indexSystem, geometryAPI) + val sc = spark import mc.functions._ import sc.implicits._ mc.register(spark) diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_SimplifyBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_SimplifyBehaviors.scala index a5e10e13d..672ca0efa 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_SimplifyBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_SimplifyBehaviors.scala @@ -17,8 +17,8 @@ trait ST_SimplifyBehaviors extends QueryTest { def simplifyBehavior(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") val mc = MosaicContext.build(indexSystem, geometryAPI) - import mc.functions._ val sc = spark + import mc.functions._ import sc.implicits._ mc.register(spark) diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_TransformBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_TransformBehaviors.scala index 1f2e8b1a5..6c11580ec 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_TransformBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_TransformBehaviors.scala @@ -20,8 +20,9 @@ trait ST_TransformBehaviors extends QueryTest { val geomFactory = new GeometryFactory() def reprojectGeometries(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { - val sc = spark + spark.sparkContext.setLogLevel("FATAL") val mc = MosaicContext.build(indexSystem, geometryAPI) + val sc = spark import mc.functions._ import sc.implicits._ mc.register(spark) diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_TranslateBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_TranslateBehaviors.scala index 58c7215db..39eb08ef6 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_TranslateBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_TranslateBehaviors.scala @@ -14,8 +14,8 @@ trait ST_TranslateBehaviors extends QueryTest { def translateBehaviour(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") - val sc = spark val mc = MosaicContext.build(indexSystem, geometryAPI) + val sc = spark import mc.functions._ import sc.implicits._ mc.register(spark) diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_UnionBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_UnionBehaviors.scala index 435677c64..3c51530e9 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_UnionBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_UnionBehaviors.scala @@ -15,9 +15,10 @@ import org.scalatest.matchers.should.Matchers.{an, be, convertToAnyShouldWrapper trait ST_UnionBehaviors extends QueryTest { def unionBehavior(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { + spark.sparkContext.setLogLevel("FATAL") val mc = MosaicContext.build(indexSystem, geometryAPI) + val sc = spark import mc.functions._ - val sc = spark import sc.implicits._ mc.register(spark) @@ -39,9 +40,10 @@ trait ST_UnionBehaviors extends QueryTest { } def unionAggBehavior(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { + spark.sparkContext.setLogLevel("FATAL") val mc = MosaicContext.build(indexSystem, geometryAPI) - import mc.functions._ val sc = spark + import mc.functions._ import sc.implicits._ mc.register(spark) diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_BandMetadataBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_BandMetadataBehaviors.scala index 6051ccc8e..6d570c757 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_BandMetadataBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_BandMetadataBehaviors.scala @@ -11,6 +11,7 @@ trait RST_BandMetadataBehaviors extends QueryTest { def bandMetadataBehavior(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { val sc = spark + spark.sparkContext.setLogLevel("FATAL") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_BoundingBoxBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_BoundingBoxBehaviors.scala index 9478411bd..e8c7cc214 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_BoundingBoxBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_BoundingBoxBehaviors.scala @@ -10,6 +10,7 @@ trait RST_BoundingBoxBehaviors extends QueryTest { // noinspection MapGetGet def behaviors(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { + spark.sparkContext.setLogLevel("FATAL") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_ClipBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_ClipBehaviors.scala index dbc0b35e9..397f02a95 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_ClipBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_ClipBehaviors.scala @@ -10,6 +10,7 @@ trait RST_ClipBehaviors extends QueryTest { // noinspection MapGetGet def behaviors(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { + spark.sparkContext.setLogLevel("FATAL") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_CombineAvgAggBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_CombineAvgAggBehaviors.scala index 5ed81f8f1..7756d0ff8 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_CombineAvgAggBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_CombineAvgAggBehaviors.scala @@ -10,6 +10,7 @@ trait RST_CombineAvgAggBehaviors extends QueryTest { // noinspection MapGetGet def behaviors(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { + spark.sparkContext.setLogLevel("FATAL") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_CombineAvgBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_CombineAvgBehaviors.scala index b0f1225d2..8ce57f5b8 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_CombineAvgBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_CombineAvgBehaviors.scala @@ -11,6 +11,7 @@ trait RST_CombineAvgBehaviors extends QueryTest { // noinspection MapGetGet def behaviors(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { + spark.sparkContext.setLogLevel("FATAL") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_DerivedBandAggBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_DerivedBandAggBehaviors.scala index c3668bd83..03d82b955 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_DerivedBandAggBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_DerivedBandAggBehaviors.scala @@ -11,6 +11,7 @@ trait RST_DerivedBandAggBehaviors extends QueryTest { // noinspection MapGetGet def behaviors(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { + spark.sparkContext.setLogLevel("FATAL") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_DerivedBandBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_DerivedBandBehaviors.scala index bd2ded02a..ef6466a88 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_DerivedBandBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_DerivedBandBehaviors.scala @@ -11,6 +11,7 @@ trait RST_DerivedBandBehaviors extends QueryTest { // noinspection MapGetGet def behaviors(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { + spark.sparkContext.setLogLevel("FATAL") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark @@ -20,6 +21,7 @@ trait RST_DerivedBandBehaviors extends QueryTest { val rastersInMemory = spark.read .format("gdal") .option("raster_storage", "in-memory") + .option("pathGlobFilter", "*_B01.TIF") .load("src/test/resources/modis") val funcName = "multiply" diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromBandsBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromBandsBehaviors.scala index 3a7f7f4a2..24e5897f7 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromBandsBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromBandsBehaviors.scala @@ -11,6 +11,7 @@ trait RST_FromBandsBehaviors extends QueryTest { // noinspection MapGetGet def behaviors(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { + spark.sparkContext.setLogLevel("FATAL") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromContentBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromContentBehaviors.scala new file mode 100644 index 000000000..5f269044f --- /dev/null +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromContentBehaviors.scala @@ -0,0 +1,72 @@ +package com.databricks.labs.mosaic.expressions.raster + +import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI +import com.databricks.labs.mosaic.core.index.IndexSystem +import com.databricks.labs.mosaic.functions.MosaicContext +import org.apache.spark.sql.QueryTest +import org.scalatest.matchers.should.Matchers._ + +trait RST_FromContentBehaviors extends QueryTest { + + // noinspection MapGetGet + def behaviors(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { + spark.sparkContext.setLogLevel("ERROR") + val mc = MosaicContext.build(indexSystem, geometryAPI) + mc.register() + val sc = spark + + import mc.functions._ + import sc.implicits._ + import org.apache.spark.sql.functions._ + + val rastersInMemory = spark.read + .format("binaryFile") + .load("src/test/resources/modis") + + val gridTiles = rastersInMemory + .withColumn("tile", rst_fromcontent($"content", "GTiff")) + .withColumn("bbox", rst_boundingbox($"tile")) + .withColumn("cent", st_centroid($"bbox")) + .withColumn("clip_region", st_buffer($"cent", 0.1)) + .withColumn("clip", rst_clip($"tile", $"clip_region")) + .withColumn("bbox2", rst_boundingbox($"clip")) + .withColumn("result", st_area($"bbox") =!= st_area($"bbox2")) + .select("result") + .as[Boolean] + .collect() + + gridTiles.forall(identity) should be(true) + + rastersInMemory.createOrReplaceTempView("source") + + val gridTilesSQL = spark + .sql(""" + |with subquery as ( + | select rst_fromcontent(content, 'GTiff') as tile from source + |) + |select st_area(rst_boundingbox(tile)) != st_area(rst_boundingbox(rst_clip(tile, st_buffer(st_centroid(rst_boundingbox(tile)), 0.1)))) as result + |from subquery + |""".stripMargin) + .as[Boolean] + .collect() + + gridTilesSQL.forall(identity) should be(true) + + + val gridTilesSQL2 = spark + .sql( + """ + |with subquery as ( + | select rst_fromcontent(content, 'GTiff', 4) as tile from source + |) + |select st_area(rst_boundingbox(tile)) != st_area(rst_boundingbox(rst_clip(tile, st_buffer(st_centroid(rst_boundingbox(tile)), 0.1)))) as result + |from subquery + |""".stripMargin) + .as[Boolean] + .collect() + + gridTilesSQL2.forall(identity) should be(true) + + } + +} diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromContentTest.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromContentTest.scala new file mode 100644 index 000000000..01b350973 --- /dev/null +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromContentTest.scala @@ -0,0 +1,31 @@ +package com.databricks.labs.mosaic.expressions.raster + +import com.databricks.labs.mosaic.core.geometry.api.JTS +import com.databricks.labs.mosaic.core.index.H3IndexSystem +import org.apache.spark.sql.QueryTest +import org.apache.spark.sql.catalyst.expressions.CodegenObjectFactoryMode +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.test.SharedSparkSessionGDAL + +import scala.util.Try + +class RST_FromContentTest extends QueryTest with SharedSparkSessionGDAL with RST_FromContentBehaviors { + + private val noCodegen = + withSQLConf( + SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> "false", + SQLConf.CODEGEN_FACTORY_MODE.key -> CodegenObjectFactoryMode.NO_CODEGEN.toString + ) _ + + // Hotfix for SharedSparkSession afterAll cleanup. + override def afterAll(): Unit = Try(super.afterAll()) + + // These tests are not index system nor geometry API specific. + // Only testing one pairing is sufficient. + test("Testing RST_FromContent with manual GDAL registration (H3, JTS).") { + noCodegen { + assume(System.getProperty("os.name") == "Linux") + behaviors(H3IndexSystem, JTS) + } + } +} diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromFileBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromFileBehaviors.scala index f61fe174d..625123bad 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromFileBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromFileBehaviors.scala @@ -10,6 +10,7 @@ trait RST_FromFileBehaviors extends QueryTest { // noinspection MapGetGet def behaviors(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { + spark.sparkContext.setLogLevel("FATAL") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_GeoReferenceBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_GeoReferenceBehaviors.scala index 6e698426d..e5ab3c159 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_GeoReferenceBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_GeoReferenceBehaviors.scala @@ -11,6 +11,7 @@ trait RST_GeoReferenceBehaviors extends QueryTest { //noinspection MapGetGet def geoReferenceBehavior(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { + spark.sparkContext.setLogLevel("FATAL") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_GetNoDataBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_GetNoDataBehaviors.scala index b1154f55e..69bf7f0f9 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_GetNoDataBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_GetNoDataBehaviors.scala @@ -10,6 +10,7 @@ trait RST_GetNoDataBehaviors extends QueryTest { //noinspection MapGetGet def behaviors(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { + spark.sparkContext.setLogLevel("FATAL") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_GetSubdatasetBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_GetSubdatasetBehaviors.scala index cc572e475..99307a158 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_GetSubdatasetBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_GetSubdatasetBehaviors.scala @@ -11,6 +11,7 @@ trait RST_GetSubdatasetBehaviors extends QueryTest { //noinspection MapGetGet def behaviors(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { + spark.sparkContext.setLogLevel("FATAL") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_HeightBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_HeightBehaviors.scala index 7effc2e14..a39c02ca3 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_HeightBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_HeightBehaviors.scala @@ -9,6 +9,7 @@ import org.scalatest.matchers.should.Matchers._ trait RST_HeightBehaviors extends QueryTest { def heightBehavior(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { + spark.sparkContext.setLogLevel("FATAL") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_InitNoDataBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_InitNoDataBehaviors.scala index cb00638e1..1af81d15d 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_InitNoDataBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_InitNoDataBehaviors.scala @@ -10,6 +10,7 @@ trait RST_InitNoDataBehaviors extends QueryTest { //noinspection MapGetGet def behaviors(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { + spark.sparkContext.setLogLevel("FATAL") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_IsEmptyBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_IsEmptyBehaviors.scala index 0db36ec39..6ce9771e9 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_IsEmptyBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_IsEmptyBehaviors.scala @@ -10,6 +10,7 @@ trait RST_IsEmptyBehaviors extends QueryTest { // noinspection AccessorLikeMethodIsUnit def isEmptyBehavior(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { + spark.sparkContext.setLogLevel("FATAL") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_MapAlgebraBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_MapAlgebraBehaviors.scala index fd15f8102..e16e41d6a 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_MapAlgebraBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_MapAlgebraBehaviors.scala @@ -11,6 +11,7 @@ trait RST_MapAlgebraBehaviors extends QueryTest { // noinspection MapGetGet def behaviors(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { + spark.sparkContext.setLogLevel("ERROR") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark @@ -20,6 +21,7 @@ trait RST_MapAlgebraBehaviors extends QueryTest { val rastersInMemory = spark.read .format("gdal") .option("raster_storage", "in-memory") + .option("pathGlobFilter", "*_B01.TIF") .load("src/test/resources/modis") val gridTiles = rastersInMemory diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_MemSizeBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_MemSizeBehaviors.scala index 741fad613..316482c1c 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_MemSizeBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_MemSizeBehaviors.scala @@ -9,6 +9,7 @@ import org.scalatest.matchers.should.Matchers._ trait RST_MemSizeBehaviors extends QueryTest { def memSizeBehavior(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { + spark.sparkContext.setLogLevel("FATAL") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_MergeAggBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_MergeAggBehaviors.scala index 0533eafee..8698b46af 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_MergeAggBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_MergeAggBehaviors.scala @@ -10,6 +10,7 @@ trait RST_MergeAggBehaviors extends QueryTest { // noinspection MapGetGet def behaviors(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { + spark.sparkContext.setLogLevel("FATAL") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark @@ -19,6 +20,7 @@ trait RST_MergeAggBehaviors extends QueryTest { val rastersInMemory = spark.read .format("gdal") .option("raster_storage", "in-memory") + .option("pathGlobFilter", "*_B01.TIF") .load("src/test/resources/modis") val gridTiles = rastersInMemory diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_MergeBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_MergeBehaviors.scala index f4b17ce83..893d6bdf4 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_MergeBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_MergeBehaviors.scala @@ -11,6 +11,7 @@ trait RST_MergeBehaviors extends QueryTest { // noinspection MapGetGet def behaviors(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { + spark.sparkContext.setLogLevel("FATAL") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark @@ -20,6 +21,7 @@ trait RST_MergeBehaviors extends QueryTest { val rastersInMemory = spark.read .format("gdal") .option("raster_storage", "in-memory") + .option("pathGlobFilter", "*_B01.TIF") .load("src/test/resources/modis") val gridTiles = rastersInMemory diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_MetadataBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_MetadataBehaviors.scala index d6869fce7..c335ee0e9 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_MetadataBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_MetadataBehaviors.scala @@ -9,6 +9,7 @@ import org.scalatest.matchers.should.Matchers._ trait RST_MetadataBehaviors extends QueryTest { def metadataBehavior(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { + spark.sparkContext.setLogLevel("FATAL") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_NDVIBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_NDVIBehaviors.scala index b433ccd79..5e7915b46 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_NDVIBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_NDVIBehaviors.scala @@ -11,6 +11,7 @@ trait RST_NDVIBehaviors extends QueryTest { // noinspection MapGetGet def behaviors(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { + spark.sparkContext.setLogLevel("ERROR") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_NumBandsBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_NumBandsBehaviors.scala index 711cab7ce..49800ed40 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_NumBandsBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_NumBandsBehaviors.scala @@ -9,6 +9,7 @@ import org.scalatest.matchers.should.Matchers._ trait RST_NumBandsBehaviors extends QueryTest { def numBandsBehavior(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { + spark.sparkContext.setLogLevel("FATAL") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_PixelHeightBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_PixelHeightBehaviors.scala index d9f0c66f1..0d6565300 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_PixelHeightBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_PixelHeightBehaviors.scala @@ -9,6 +9,7 @@ import org.scalatest.matchers.should.Matchers._ trait RST_PixelHeightBehaviors extends QueryTest { def pixelHeightBehavior(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { + spark.sparkContext.setLogLevel("FATAL") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_PixelWidthBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_PixelWidthBehaviors.scala index 895c12a52..346172bfb 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_PixelWidthBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_PixelWidthBehaviors.scala @@ -9,6 +9,7 @@ import org.scalatest.matchers.should.Matchers._ trait RST_PixelWidthBehaviors extends QueryTest { def pixelWidthBehavior(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { + spark.sparkContext.setLogLevel("FATAL") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToGridAvgBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToGridAvgBehaviors.scala index 2a08fe559..d41882506 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToGridAvgBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToGridAvgBehaviors.scala @@ -10,6 +10,7 @@ import org.scalatest.matchers.should.Matchers._ trait RST_RasterToGridAvgBehaviors extends QueryTest { def rasterToGridAvgBehavior(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { + spark.sparkContext.setLogLevel("FATAL") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToGridCountBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToGridCountBehaviors.scala index 2d1eca342..5d10766f3 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToGridCountBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToGridCountBehaviors.scala @@ -10,6 +10,7 @@ import org.scalatest.matchers.should.Matchers._ trait RST_RasterToGridCountBehaviors extends QueryTest { def rasterToGridCountBehavior(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { + spark.sparkContext.setLogLevel("FATAL") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToGridMaxBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToGridMaxBehaviors.scala index f150abdf9..df5d8ee5e 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToGridMaxBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToGridMaxBehaviors.scala @@ -10,6 +10,7 @@ import org.scalatest.matchers.should.Matchers._ trait RST_RasterToGridMaxBehaviors extends QueryTest { def rasterToGridMaxBehavior(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { + spark.sparkContext.setLogLevel("FATAL") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToGridMedianBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToGridMedianBehaviors.scala index 49ca59dd3..ade27ed78 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToGridMedianBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToGridMedianBehaviors.scala @@ -10,6 +10,7 @@ import org.scalatest.matchers.should.Matchers._ trait RST_RasterToGridMedianBehaviors extends QueryTest { def rasterToGridMedianBehavior(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { + spark.sparkContext.setLogLevel("FATAL") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToGridMinBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToGridMinBehaviors.scala index 134f0bfa4..38ab49db1 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToGridMinBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToGridMinBehaviors.scala @@ -10,6 +10,7 @@ import org.scalatest.matchers.should.Matchers._ trait RST_RasterToGridMinBehaviors extends QueryTest { def rasterToGridMinBehavior(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { + spark.sparkContext.setLogLevel("FATAL") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToWorldCoordBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToWorldCoordBehaviors.scala index 8265e745a..cbd00572c 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToWorldCoordBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToWorldCoordBehaviors.scala @@ -10,6 +10,7 @@ import org.scalatest.matchers.should.Matchers._ trait RST_RasterToWorldCoordBehaviors extends QueryTest { def rasterToWorldCoordBehavior(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { + spark.sparkContext.setLogLevel("FATAL") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToWorldCoordXBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToWorldCoordXBehaviors.scala index 079e0839b..2fab5d5ec 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToWorldCoordXBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToWorldCoordXBehaviors.scala @@ -10,6 +10,7 @@ import org.scalatest.matchers.should.Matchers._ trait RST_RasterToWorldCoordXBehaviors extends QueryTest { def rasterToWorldCoordX(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { + spark.sparkContext.setLogLevel("FATAL") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToWorldCoordYBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToWorldCoordYBehaviors.scala index c27722e8b..9da7b5ec9 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToWorldCoordYBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToWorldCoordYBehaviors.scala @@ -10,6 +10,7 @@ import org.scalatest.matchers.should.Matchers._ trait RST_RasterToWorldCoordYBehaviors extends QueryTest { def rasterToWorldCoordY(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { + spark.sparkContext.setLogLevel("FATAL") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_ReTileBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_ReTileBehaviors.scala index 608c3de85..118a64e2a 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_ReTileBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_ReTileBehaviors.scala @@ -10,6 +10,7 @@ import org.scalatest.matchers.should.Matchers._ trait RST_ReTileBehaviors extends QueryTest { def retileBehavior(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { + spark.sparkContext.setLogLevel("FATAL") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_RotationBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_RotationBehaviors.scala index 6469d7292..9bdbb7ef1 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_RotationBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_RotationBehaviors.scala @@ -9,6 +9,7 @@ import org.scalatest.matchers.should.Matchers._ trait RST_RotationBehaviors extends QueryTest { def rotationBehavior(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { + spark.sparkContext.setLogLevel("FATAL") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_SRIDBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_SRIDBehaviors.scala index debe3d0a1..31fde84c4 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_SRIDBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_SRIDBehaviors.scala @@ -9,6 +9,7 @@ import org.scalatest.matchers.should.Matchers._ trait RST_SRIDBehaviors extends QueryTest { def sridBehavior(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { + spark.sparkContext.setLogLevel("FATAL") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_ScaleXBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_ScaleXBehaviors.scala index e12dca7fe..3543fb469 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_ScaleXBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_ScaleXBehaviors.scala @@ -9,6 +9,7 @@ import org.scalatest.matchers.should.Matchers._ trait RST_ScaleXBehaviors extends QueryTest { def scaleXBehavior(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { + spark.sparkContext.setLogLevel("FATAL") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_ScaleYBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_ScaleYBehaviors.scala index e264199b1..1251f660a 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_ScaleYBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_ScaleYBehaviors.scala @@ -9,6 +9,7 @@ import org.scalatest.matchers.should.Matchers._ trait RST_ScaleYBehaviors extends QueryTest { def scaleYBehavior(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { + spark.sparkContext.setLogLevel("FATAL") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_SetNoDataBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_SetNoDataBehaviors.scala index c28403817..2d35388fd 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_SetNoDataBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_SetNoDataBehaviors.scala @@ -11,6 +11,7 @@ trait RST_SetNoDataBehaviors extends QueryTest { // noinspection MapGetGet def behaviors(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { + spark.sparkContext.setLogLevel("FATAL") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_SkewXBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_SkewXBehaviors.scala index 2a5b5e3db..f7e745613 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_SkewXBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_SkewXBehaviors.scala @@ -9,6 +9,7 @@ import org.scalatest.matchers.should.Matchers._ trait RST_SkewXBehaviors extends QueryTest { def skewXBehavior(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { + spark.sparkContext.setLogLevel("FATAL") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_SkewYBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_SkewYBehaviors.scala index 294157065..ef444858f 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_SkewYBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_SkewYBehaviors.scala @@ -9,6 +9,7 @@ import org.scalatest.matchers.should.Matchers._ trait RST_SkewYBehaviors extends QueryTest { def skewYBehavior(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { + spark.sparkContext.setLogLevel("FATAL") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_SubdatasetsBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_SubdatasetsBehaviors.scala index ad713f17c..c8f7d435b 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_SubdatasetsBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_SubdatasetsBehaviors.scala @@ -9,6 +9,7 @@ import org.scalatest.matchers.should.Matchers._ trait RST_SubdatasetsBehaviors extends QueryTest { def subdatasetsBehavior(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { + spark.sparkContext.setLogLevel("FATAL") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_SummaryBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_SummaryBehaviors.scala index 1d53cdb4a..610e7c657 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_SummaryBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_SummaryBehaviors.scala @@ -9,6 +9,7 @@ import org.scalatest.matchers.should.Matchers._ trait RST_SummaryBehaviors extends QueryTest { def summaryBehavior(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { + spark.sparkContext.setLogLevel("FATAL") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_TessellateBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_TessellateBehaviors.scala index daad95af0..c346e82db 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_TessellateBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_TessellateBehaviors.scala @@ -10,6 +10,7 @@ trait RST_TessellateBehaviors extends QueryTest { // noinspection MapGetGet def tessellateBehavior(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { + spark.sparkContext.setLogLevel("FATAL") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_ToOverlappingTilesBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_ToOverlappingTilesBehaviors.scala index d51f26891..560e54dee 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_ToOverlappingTilesBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_ToOverlappingTilesBehaviors.scala @@ -11,6 +11,7 @@ trait RST_ToOverlappingTilesBehaviors extends QueryTest { // noinspection MapGetGet def behaviors(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { + spark.sparkContext.setLogLevel("FATAL") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_TryOpenBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_TryOpenBehaviors.scala index 3e5669614..257d1cfc3 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_TryOpenBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_TryOpenBehaviors.scala @@ -10,6 +10,7 @@ trait RST_TryOpenBehaviors extends QueryTest { // noinspection MapGetGet def behaviors(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { + spark.sparkContext.setLogLevel("FATAL") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_UpperLeftXBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_UpperLeftXBehaviors.scala index 88e5ecd3a..e997a5c87 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_UpperLeftXBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_UpperLeftXBehaviors.scala @@ -9,6 +9,7 @@ import org.scalatest.matchers.should.Matchers._ trait RST_UpperLeftXBehaviors extends QueryTest { def upperLeftXBehavior(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { + spark.sparkContext.setLogLevel("FATAL") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_UpperLeftYBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_UpperLeftYBehaviors.scala index fc83d11d2..d98824f3d 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_UpperLeftYBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_UpperLeftYBehaviors.scala @@ -9,6 +9,7 @@ import org.scalatest.matchers.should.Matchers._ trait RST_UpperLeftYBehaviors extends QueryTest { def upperLeftYBehavior(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { + spark.sparkContext.setLogLevel("FATAL") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_WidthBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_WidthBehaviors.scala index 885a3e05a..f3fd3c416 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_WidthBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_WidthBehaviors.scala @@ -9,6 +9,7 @@ import org.scalatest.matchers.should.Matchers._ trait RST_WidthBehaviors extends QueryTest { def widthBehavior(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { + spark.sparkContext.setLogLevel("FATAL") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_WorldToRasterCoordBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_WorldToRasterCoordBehaviors.scala index 4aaf86b3e..baef5fa90 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_WorldToRasterCoordBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_WorldToRasterCoordBehaviors.scala @@ -10,6 +10,7 @@ import org.scalatest.matchers.should.Matchers._ trait RST_WorldToRasterCoordBehaviors extends QueryTest { def worldToRasterCoordBehavior(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { + spark.sparkContext.setLogLevel("FATAL") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_WorldToRasterCoordXBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_WorldToRasterCoordXBehaviors.scala index 9dc26422a..f9fd82dff 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_WorldToRasterCoordXBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_WorldToRasterCoordXBehaviors.scala @@ -10,6 +10,7 @@ import org.scalatest.matchers.should.Matchers._ trait RST_WorldToRasterCoordXBehaviors extends QueryTest { def worldToRasterCoordXBehavior(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { + spark.sparkContext.setLogLevel("FATAL") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_WorldToRasterCoordYBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_WorldToRasterCoordYBehaviors.scala index e2a259b55..bffba09c8 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_WorldToRasterCoordYBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_WorldToRasterCoordYBehaviors.scala @@ -10,6 +10,7 @@ import org.scalatest.matchers.should.Matchers._ trait RST_WorldToRasterCoordYBehaviors extends QueryTest { def worldToRasterCoordYBehavior(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { + spark.sparkContext.setLogLevel("FATAL") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/functions/MosaicContextBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/functions/MosaicContextBehaviors.scala index 35bf435ba..a4afc3a1a 100644 --- a/src/test/scala/com/databricks/labs/mosaic/functions/MosaicContextBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/functions/MosaicContextBehaviors.scala @@ -1,8 +1,8 @@ package com.databricks.labs.mosaic.functions +import com.databricks.labs.mosaic._ import com.databricks.labs.mosaic.core.index._ import com.databricks.labs.mosaic.test._ -import com.databricks.labs.mosaic._ import org.apache.spark.sql.adapters.Column import org.apache.spark.sql.catalyst.FunctionIdentifier import org.apache.spark.sql.catalyst.analysis.FunctionRegistry.FunctionBuilder @@ -248,17 +248,29 @@ trait MosaicContextBehaviors extends MosaicSpatialQueryTest { method.apply(1).asInstanceOf[Int] shouldBe 2 } - def printWarnings(): Unit = { - spark.conf.set("spark.databricks.clusterUsageTags.sparkVersion", "1-x") - spark.conf.set("spark.databricks.photon.enabled", "false") - spark.conf.set("spark.databricks.clusterUsageTags.clusterType", "1-x") - noException should be thrownBy MosaicContext.checkDBR(spark) - } + def throwErrors(): Unit = { + spark.conf.set("spark.databricks.clusterUsageTags.sparkVersion", "13-x") + an[Exception] should be thrownBy MosaicContext.checkDBR(spark) - def throwError(): Unit = { spark.conf.set("spark.databricks.clusterUsageTags.sparkVersion", "14-x") - spark.conf.set("spark.databricks.clusterUsageTags.clusterType", "14-x") an[Exception] should be thrownBy MosaicContext.checkDBR(spark) + + spark.conf.set("spark.databricks.clusterUsageTags.sparkVersion", "14-photon-x") + an[Exception] should be thrownBy MosaicContext.checkDBR(spark) + + spark.conf.set("spark.databricks.clusterUsageTags.sparkVersion", "12-x") + an[Exception] should be thrownBy MosaicContext.checkDBR(spark) + + spark.conf.set("spark.databricks.clusterUsageTags.sparkVersion", "12-photon-x") + an[Exception] should be thrownBy MosaicContext.checkDBR(spark) + } + + def noErrors(): Unit = { + spark.conf.set("spark.databricks.clusterUsageTags.sparkVersion", "13-ml-x") + noException should be thrownBy MosaicContext.checkDBR(spark) + + spark.conf.set("spark.databricks.clusterUsageTags.sparkVersion", "13-photon-x") + noException should be thrownBy MosaicContext.checkDBR(spark) } } diff --git a/src/test/scala/com/databricks/labs/mosaic/functions/MosaicContextTest.scala b/src/test/scala/com/databricks/labs/mosaic/functions/MosaicContextTest.scala index a06a43bcd..c6e773244 100644 --- a/src/test/scala/com/databricks/labs/mosaic/functions/MosaicContextTest.scala +++ b/src/test/scala/com/databricks/labs/mosaic/functions/MosaicContextTest.scala @@ -17,7 +17,6 @@ class MosaicContextTest extends MosaicSpatialQueryTest with SharedSparkSession w test("MosaicContext lookup correct sql functions") { sqlFunctionLookup() } test("MosaicContext should use databricks h3") { callDatabricksH3() } test("MosaicContext should correctly reflect functions") { reflectedMethods() } - test("MosaicContext should printWarning") { printWarnings() } - test("MosaicContext should throwError") { throwError() } - + test("MosaicContext should throw error for wrong DBRs") { throwErrors() } + test("MosaicContext should not throw error for correct DBRs") { noErrors() } } diff --git a/src/test/scala/com/databricks/labs/mosaic/models/knn/SpatialKNNTest.scala b/src/test/scala/com/databricks/labs/mosaic/models/knn/SpatialKNNTest.scala index f9d133b7e..e3096ba84 100644 --- a/src/test/scala/com/databricks/labs/mosaic/models/knn/SpatialKNNTest.scala +++ b/src/test/scala/com/databricks/labs/mosaic/models/knn/SpatialKNNTest.scala @@ -21,6 +21,7 @@ class SpatialKNNTest extends AnyFlatSpec with SpatialKNNBehaviors with SparkSuit .set("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog") .set("spark.sql.parquet.compression.codec", "uncompressed") var spark = withConf(conf) + spark.sparkContext.setLogLevel("FATAL") it should behave like noApproximation(MosaicContext.build(H3IndexSystem, JTS), spark) conf = new SparkConf(false) @@ -31,6 +32,7 @@ class SpatialKNNTest extends AnyFlatSpec with SpatialKNNBehaviors with SparkSuit .set("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog") .set("spark.sql.parquet.compression.codec", "uncompressed") spark = withConf(conf) + spark.sparkContext.setLogLevel("FATAL") it should behave like noApproximation(MosaicContext.build(BNGIndexSystem, JTS), spark) } diff --git a/src/test/scala/com/databricks/labs/mosaic/sql/extensions/TestSQLExtensions.scala b/src/test/scala/com/databricks/labs/mosaic/sql/extensions/TestSQLExtensions.scala index db100a53d..55bdcf1e2 100644 --- a/src/test/scala/com/databricks/labs/mosaic/sql/extensions/TestSQLExtensions.scala +++ b/src/test/scala/com/databricks/labs/mosaic/sql/extensions/TestSQLExtensions.scala @@ -18,6 +18,7 @@ class TestSQLExtensions extends AnyFlatSpec with SQLExtensionsBehaviors with Spa .set(MOSAIC_RASTER_API, "GDAL") .set("spark.sql.extensions", "com.databricks.labs.mosaic.sql.extensions.MosaicSQL") var spark = withConf(conf) + spark.sparkContext.setLogLevel("FATAL") it should behave like sqlRegister(MosaicContext.build(H3IndexSystem, JTS), spark) conf = new SparkConf(false) @@ -26,6 +27,7 @@ class TestSQLExtensions extends AnyFlatSpec with SQLExtensionsBehaviors with Spa .set(MOSAIC_RASTER_API, "GDAL") .set("spark.sql.extensions", "com.databricks.labs.mosaic.sql.extensions.MosaicSQL") spark = withConf(conf) + spark.sparkContext.setLogLevel("FATAL") it should behave like sqlRegister(MosaicContext.build(BNGIndexSystem, JTS), spark) conf = new SparkConf(false) @@ -34,6 +36,7 @@ class TestSQLExtensions extends AnyFlatSpec with SQLExtensionsBehaviors with Spa .set(MOSAIC_RASTER_API, "GDAL") .set("spark.sql.extensions", "com.databricks.labs.mosaic.sql.extensions.MosaicSQL") spark = withConf(conf) + spark.sparkContext.setLogLevel("FATAL") it should behave like { an[Error] should be thrownBy spark.sql("""show functions""").collect() } @@ -41,6 +44,7 @@ class TestSQLExtensions extends AnyFlatSpec with SQLExtensionsBehaviors with Spa conf = new SparkConf(false) .set("spark.sql.extensions", "com.databricks.labs.mosaic.sql.extensions.MosaicSQLDefault") spark = withConf(conf) + spark.sparkContext.setLogLevel("FATAL") it should behave like sqlRegister(MosaicContext.build(H3IndexSystem, JTS), spark) } @@ -49,9 +53,10 @@ class TestSQLExtensions extends AnyFlatSpec with SQLExtensionsBehaviors with Spa assume(System.getProperty("os.name") == "Linux") val conf = new SparkConf(loadDefaults = false) - .set("spark.sql.extensions", "com.databricks.labs.mosaic.sql.extensions.MosaicGDAL") .set(MOSAIC_GDAL_NATIVE, "true") + .set("spark.sql.extensions", "com.databricks.labs.mosaic.sql.extensions.MosaicGDAL") val spark = withConf(conf) + spark.sparkContext.setLogLevel("FATAL") it should behave like mosaicGDAL(MosaicContext.build(H3IndexSystem, JTS), spark) } diff --git a/src/test/scala/com/databricks/labs/mosaic/test/SparkSuite.scala b/src/test/scala/com/databricks/labs/mosaic/test/SparkSuite.scala index 6e40e5350..2346ad05f 100644 --- a/src/test/scala/com/databricks/labs/mosaic/test/SparkSuite.scala +++ b/src/test/scala/com/databricks/labs/mosaic/test/SparkSuite.scala @@ -2,7 +2,6 @@ package com.databricks.labs.mosaic.test import org.apache.spark.{SparkConf, SparkContext} import org.scalatest.{BeforeAndAfterAll, TestSuite} - import org.apache.spark.sql._ trait SparkSuite extends TestSuite with BeforeAndAfterAll { diff --git a/src/test/scala/org/apache/spark/sql/test/SharedSparkSessionGDAL.scala b/src/test/scala/org/apache/spark/sql/test/SharedSparkSessionGDAL.scala index 36da49694..a720b3f9b 100644 --- a/src/test/scala/org/apache/spark/sql/test/SharedSparkSessionGDAL.scala +++ b/src/test/scala/org/apache/spark/sql/test/SharedSparkSessionGDAL.scala @@ -1,7 +1,7 @@ package org.apache.spark.sql.test +import com.databricks.labs.mosaic._ import com.databricks.labs.mosaic.gdal.MosaicGDAL -import com.databricks.labs.mosaic.{MOSAIC_GDAL_NATIVE, MOSAIC_RASTER_CHECKPOINT} import org.apache.spark.SparkConf import org.apache.spark.sql.SparkSession import org.gdal.gdal.gdal @@ -23,10 +23,6 @@ trait SharedSparkSessionGDAL extends SharedSparkSession { val session = new TestSparkSession(conf) session.sparkContext.setLogLevel("FATAL") Try { - val tempPath = Files.createTempDirectory("mosaic-gdal") - // prepareEnvironment no longer exists - // - only have python calls now - //MosaicGDAL.prepareEnvironment(session, tempPath.toAbsolutePath.toString) MosaicGDAL.enableGDAL(session) } session @@ -37,5 +33,5 @@ trait SharedSparkSessionGDAL extends SharedSparkSession { MosaicGDAL.enableGDAL(this.spark) gdal.AllRegister() } - + }