Skip to content

Commit

Permalink
Merge branch 'databrickslabs:main' into main
Browse files Browse the repository at this point in the history
  • Loading branch information
a0x8o authored Nov 15, 2023
2 parents fc4e2b2 + 3c5ab2e commit 25606ed
Show file tree
Hide file tree
Showing 269 changed files with 11,851 additions and 3,056 deletions.
2 changes: 1 addition & 1 deletion .github/actions/python_build/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ runs:
shell: bash
run: |
cd python
pip install build wheel pyspark==${{ matrix.spark }}
pip install build wheel pyspark==${{ matrix.spark }} numpy==${{ matrix.numpy }}
pip install .
- name: Test and build python package
shell: bash
Expand Down
47 changes: 39 additions & 8 deletions .github/actions/r_build/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,23 +6,54 @@ runs:
- name: Setup R build environment
shell: bash
run: |
sudo apt-get update && sudo apt-get install -y curl libcurl4-openssl-dev pkg-config libharfbuzz-dev libfribidi-dev
- name: Download and unpack Spark
sudo apt-get update && sudo apt-get install -y curl libcurl4-openssl-dev pkg-config libharfbuzz-dev libfribidi-dev
- name: Create download location for Spark
shell: bash
run: |
sudo mkdir -p /usr/spark-download/raw
sudo mkdir -p /usr/spark-download/unzipped
sudo mkdir -p /usr/spark-download/raw
sudo chown -R $USER: /usr/spark-download/
wget -P /usr/spark-download/raw https://archive.apache.org/dist/spark/spark-3.2.1/spark-3.2.1-bin-hadoop2.7.tgz
- name: Cache Spark download
id: cache-spark
uses: actions/cache@v3
with:
path: /usr/spark-download/unzipped
key: r_build-spark
- if: ${{ steps.cache-spark.outputs.cache-hit != 'true' }}
name: Download and unpack Spark
shell: bash
run: |
wget -P /usr/spark-download/raw https://archive.apache.org/dist/spark/spark-3.2.1/spark-3.2.1-bin-hadoop2.7.tgz
tar zxvf /usr/spark-download/raw/spark-3.2.1-bin-hadoop2.7.tgz -C /usr/spark-download/unzipped
- name: Build R package
- name: Create R environment
shell: bash
run: |
cd R
sudo mkdir -p /usr/lib/R/site-library
sudo chown -R $USER: /usr/lib/R/site-library
- name: Setup R
uses: r-lib/actions/setup-r@v2
with:
r-version: ${{ matrix.R }}
use-public-rspm: true
- name: Install R dependencies
shell: bash
run: |
cd R
Rscript --vanilla install_deps.R
- name: Generate R bindings
shell: bash
run: |
cd R
Rscript --vanilla generate_R_bindings.R ../src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala
- name: Build R docs
shell: bash
run: |
cd R
Rscript --vanilla generate_docs.R
- name: Build R package
shell: bash
run: |
cd R
Rscript --vanilla build_r_package.R
- name: Test R package
shell: bash
Expand Down
14 changes: 13 additions & 1 deletion .github/actions/scala_build/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,27 @@ runs:
with:
java-version: '8'
distribution: 'zulu'
- name: Configure python interpreter
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python }}
- name: Add packaged GDAL dependencies
shell: bash
run : |
sudo apt-get update && sudo apt-get install -y unixodbc libcurl3-gnutls libsnappy-dev libopenjp2-7
pip install databricks-mosaic-gdal==${{ matrix.gdal }}
sudo tar -xf /opt/hostedtoolcache/Python/${{ matrix.python }}/x64/lib/python3.9/site-packages/databricks-mosaic-gdal/resources/gdal-${{ matrix.gdal }}-filetree.tar.xz -C /
sudo tar -xhf /opt/hostedtoolcache/Python/${{ matrix.python }}/x64/lib/python3.9/site-packages/databricks-mosaic-gdal/resources/gdal-${{ matrix.gdal }}-symlinks.tar.xz -C /
- name: Test and build the scala JAR - skip tests is false
if: inputs.skip_tests == 'false'
shell: bash
run: sudo mvn -q clean install
- name: Build the scala JAR - skip tests is true
if: inputs.skip_tests == 'true'
shell: bash
run: sudo mvn -q clean install -DskipTests
run: sudo mvn -q clean install -DskipTests -Dscoverage.skip
- name: Publish test coverage
if: inputs.skip_tests == 'false'
uses: codecov/codecov-action@v1
- name: Copy Scala artifacts to GH Actions run
shell: bash
Expand Down
16 changes: 10 additions & 6 deletions .github/workflows/build_main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@ name: build main
on:
push:
branches-ignore:
- "R/*"
- "r/*"
- "python/*"
- "scala/*"
- "R/**"
- "r/**"
- "python/**"
- "scala/**"
pull_request:
branches:
- "**"
Expand All @@ -16,8 +16,10 @@ jobs:
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
strategy:
matrix:
python: [ 3.8.10 ]
spark: [ 3.2.1 ]
python: [ 3.9.5 ]
numpy: [ 1.21.5 ]
gdal: [ 3.4.3 ]
spark: [ 3.3.2 ]
R: [ 4.1.2 ]
steps:
- name: checkout code
Expand All @@ -26,5 +28,7 @@ jobs:
uses: ./.github/actions/scala_build
- name: build python
uses: ./.github/actions/python_build
- name: build R
uses: ./.github/actions/r_build
- name: upload artefacts
uses: ./.github/actions/upload_artefacts
8 changes: 5 additions & 3 deletions .github/workflows/build_python.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ name: build_python
on:
push:
branches:
- "python/*"
- "python/**"

jobs:
build:
Expand All @@ -12,8 +12,10 @@ jobs:
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
strategy:
matrix:
python: [ 3.8.10 ]
spark: [ 3.2.1 ]
python: [ 3.9.5 ]
numpy: [ 1.21.5 ]
gdal: [ 3.4.3 ]
spark: [ 3.3.2 ]
R: [ 4.1.2 ]
steps:
- name: checkout code
Expand Down
10 changes: 6 additions & 4 deletions .github/workflows/build_r.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@ name: build_R
on:
push:
branches:
- 'r/*'
- 'R/*'
- 'r/**'
- 'R/**'

jobs:
build:
Expand All @@ -13,8 +13,10 @@ jobs:
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
strategy:
matrix:
python: [ 3.8.10 ]
spark: [ 3.2.1 ]
python: [ 3.9.5 ]
numpy: [ 1.21.5 ]
gdal: [ 3.4.3 ]
spark: [ 3.3.2 ]
R: [ 4.1.2 ]
steps:
- name: checkout code
Expand Down
8 changes: 5 additions & 3 deletions .github/workflows/build_scala.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ name: build_scala
on:
push:
branches:
- "scala/"
- "scala/**"

jobs:
build:
Expand All @@ -11,8 +11,10 @@ jobs:
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
strategy:
matrix:
python: [ 3.8.10 ]
spark: [ 3.2.1 ]
python: [ 3.9.5 ]
numpy: [ 1.21.5 ]
gdal: [ 3.4.3 ]
spark: [ 3.3.2 ]
R: [ 4.1.2 ]
steps:
- name: checkout code
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/pypi-release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ jobs:
runs-on: ubuntu-20.04
strategy:
matrix:
python: [3.8.10]
python: [3.9]
spark: [3.2.1]
steps:
- name: checkout code
Expand Down
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#IntelliJ files
.idea
*.iml
tmp_

#VSCode files
.vscode
Expand Down Expand Up @@ -65,6 +66,7 @@ coverage.xml
.hypothesis/
.pytest_cache/
/python/test/.run/
spatial_knn

# Translations
*.mo
Expand Down
8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,13 @@
## v0.3.12
- Make JTS default Geometry Provider
- Add raster tile functions.
- Expand the support for raster manipulation.
- Add abstractions for running distributed gdal_translate, gdalwarp, gdalcalc, etc.
- Add RST_BoundingBox, RST_Clip, RST_CombineAvg, RST_CombineAvgAgg, RST_FromBands, RST_FromFile, RST_GetNoData,
RST_InitNoData, RST_Merge, RST_MergeAgg, RST_NDVI, RST_ReTile, RST_SetNoData, RST_Subdivide
- Add RST_Tessellate that generates H3 tiles from rasters.
- Add RST_ToOverlappingTiles that generates tiles with overlapping areas from rasters.
- Add GDAL generic format reader.

## v0.3.11
- Update the CONTRIBUTING.md to follow the standard process.
Expand Down
1 change: 1 addition & 0 deletions R/.gitignore
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
**/.Rhistory
**/*.tar.gz
39 changes: 0 additions & 39 deletions R/build_r_package.R
Original file line number Diff line number Diff line change
@@ -1,54 +1,15 @@
repos = c(
"https://cran.ma.imperial.ac.uk" = "https://cran.ma.imperial.ac.uk"
,"https://www.stats.bris.ac.uk/R" = "https://www.stats.bris.ac.uk/R"
,"https://cran.rstudio.com/" = "https://cran.rstudio.com/"
)

mirror_is_up <- function(x){
out <- tryCatch({
available.packages(contrib.url(x))
}
,error = function(cond){return(0)}
,warning = function(cond){return(0)}
,finally = function(cond){}
)
return(length(out))
}

mirror_status = lapply(repos, mirror_is_up)
for(repo in names(mirror_status)){
if (mirror_status[[repo]] > 1){
repo <<- repo
break
}
}

install.packages("pkgbuild", repos=repo)
install.packages("roxygen2", repos=repo)
install.packages("sparklyr", repos=repo)
spark_location <- "/usr/spark-download/unzipped/spark-3.2.1-bin-hadoop2.7"
Sys.setenv(SPARK_HOME = spark_location)

library(SparkR, lib.loc = c(file.path(spark_location, "R", "lib")))


library(pkgbuild)
library(roxygen2)
library(sparklyr)



build_mosaic_bindings <- function(){
# build functions
scala_file_path <- "../src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala"
system_cmd <- paste0(c("Rscript --vanilla generate_R_bindings.R", scala_file_path), collapse = " ")
system(system_cmd)

# build doc
roxygen2::roxygenize("sparkR-mosaic/sparkrMosaic")
roxygen2::roxygenize("sparklyr-mosaic/sparklyrMosaic")


## build package
pkgbuild::build("sparkR-mosaic/sparkrMosaic")
pkgbuild::build("sparklyr-mosaic/sparklyrMosaic")
Expand Down
14 changes: 14 additions & 0 deletions R/generate_docs.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
spark_location <- "/usr/spark-download/unzipped/spark-3.2.1-bin-hadoop2.7"
Sys.setenv(SPARK_HOME = spark_location)

library(SparkR, lib.loc = c(file.path(spark_location, "R", "lib")))
library(roxygen2)

build_mosaic_docs <- function(){
# build doc
roxygen2::roxygenize("sparkR-mosaic/sparkrMosaic")
roxygen2::roxygenize("sparklyr-mosaic/sparklyrMosaic")

}

build_mosaic_docs()
5 changes: 5 additions & 0 deletions R/install_deps.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
options(repos = c(CRAN = "https://packagemanager.posit.co/cran/__linux__/focal/latest"))

install.packages("pkgbuild")
install.packages("roxygen2")
install.packages("sparklyr")
5 changes: 1 addition & 4 deletions R/sparkR-mosaic/enableMosaic.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,11 @@
enableMosaic <- function(
geometryAPI="JTS"
,indexSystem="H3"
,rasterAPI="GDAL"
){
geometry_api <- sparkR.callJStatic(x="com.databricks.labs.mosaic.core.geometry.api.GeometryAPI", methodName="apply", geometryAPI)
indexing_system <- sparkR.callJStatic(x="com.databricks.labs.mosaic.core.index.IndexSystemFactory", methodName="getIndexSystem", indexSystem)

raster_api <- sparkR.callJStatic(x="com.databricks.labs.mosaic.core.raster.api.RasterAPI", methodName="apply", rasterAPI)

mosaic_context <- sparkR.newJObject(x="com.databricks.labs.mosaic.functions.MosaicContext", indexing_system, geometry_api, raster_api)
mosaic_context <- sparkR.newJObject(x="com.databricks.labs.mosaic.functions.MosaicContext", indexing_system, geometry_api)
functions <<- sparkR.callJMethod(mosaic_context, "functions")
# register the sql functions for use in sql() commands
sparkR.callJMethod(mosaic_context, "register")
Expand Down
4 changes: 2 additions & 2 deletions R/sparkR-mosaic/sparkrMosaic/DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
Package: sparkrMosaic
Title: SparkR bindings for Databricks Mosaic
Version: 0.3.11
Version: 0.3.12
Authors@R:
person("Robert", "Whiffin", , "[email protected]", role = c("aut", "cre")
)
Description: This package extends SparkR to bring the Databricks Mosaic for geospatial processing APIs into SparkR.
License: Databricks
Encoding: UTF-8
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.2.0
RoxygenNote: 7.2.3
Collate:
'enableMosaic.R'
'generics.R'
Expand Down
6 changes: 2 additions & 4 deletions R/sparkR-mosaic/tests.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
repo<-"https://cran.ma.imperial.ac.uk/"

spark_location <- "/usr/spark-download/unzipped/spark-3.2.1-bin-hadoop2.7"
Sys.setenv(SPARK_HOME = spark_location)
library(SparkR, lib.loc = c(file.path(spark_location, "R", "lib")))
Expand Down Expand Up @@ -66,15 +64,15 @@ sdf <- withColumn(sdf, "transformed_geom", st_transform(column("geom_with_srid")
# Grid functions
sdf <- withColumn(sdf, "grid_longlatascellid", grid_longlatascellid(lit(1), lit(1), lit(1L)))
sdf <- withColumn(sdf, "grid_pointascellid", grid_pointascellid(column("point_wkt"), lit(1L)))
sdf <- withColumn(sdf, "grid_boundaryaswkb", grid_boundaryaswkb( SparkR::cast(lit(1), "long")))
sdf <- withColumn(sdf, "grid_boundaryaswkb", grid_boundaryaswkb(column("grid_pointascellid")))
sdf <- withColumn(sdf, "grid_polyfill", grid_polyfill(column("wkt"), lit(1L)))
sdf <- withColumn(sdf, "grid_tessellateexplode", grid_tessellateexplode(column("wkt"), lit(1L)))
sdf <- withColumn(sdf, "grid_tessellate", grid_tessellate(column("wkt"), lit(1L)))

# Deprecated
sdf <- withColumn(sdf, "point_index_lonlat", point_index_lonlat(lit(1), lit(1), lit(1L)))
sdf <- withColumn(sdf, "point_index_geom", point_index_geom(column("point_wkt"), lit(1L)))
sdf <- withColumn(sdf, "index_geometry", index_geometry( SparkR::cast(lit(1), "long")))
sdf <- withColumn(sdf, "index_geometry", index_geometry(column("point_index_geom")))
sdf <- withColumn(sdf, "polyfill", polyfill(column("wkt"), lit(1L)))
sdf <- withColumn(sdf, "mosaic_explode", mosaic_explode(column("wkt"), lit(1L)))
sdf <- withColumn(sdf, "mosaicfill", mosaicfill(column("wkt"), lit(1L)))
Expand Down
4 changes: 2 additions & 2 deletions R/sparklyr-mosaic/sparklyrMosaic/DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
Package: sparklyrMosaic
Title: sparklyr bindings for Databricks Mosaic
Version: 0.3.11
Version: 0.3.12
Authors@R:
person("Robert", "Whiffin", , "[email protected]", role = c("aut", "cre")
)
Description: This package extends sparklyr to bring the Databricks Mosaic for geospatial processing APIs into sparklyr .
License: Databricks
Encoding: UTF-8
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.2.0
RoxygenNote: 7.2.3
Collate:
'enableMosaic.R'
'sparkFunctions.R'
Expand Down
Loading

0 comments on commit 25606ed

Please sign in to comment.