From 22c7b8ec5f505454e37163d0bc86b97041b74675 Mon Sep 17 00:00:00 2001 From: Rob Martinsen Date: Sat, 23 Nov 2024 08:34:54 -0500 Subject: [PATCH] Add timescale and pg_stat_statement postgres extensions --- data/Dockerfile-timescale | 31 +++++++++++++++++++++++++++++++ data/docker-compose.yml | 23 +++++++++++++++++++++-- data/init_pg_timescale.sql | 5 +++++ docs/SETUP/BACK_END.md | 16 ++++++++++++++++ 4 files changed, 73 insertions(+), 2 deletions(-) create mode 100644 data/Dockerfile-timescale create mode 100644 data/init_pg_timescale.sql diff --git a/data/Dockerfile-timescale b/data/Dockerfile-timescale new file mode 100644 index 00000000..8de20bb8 --- /dev/null +++ b/data/Dockerfile-timescale @@ -0,0 +1,31 @@ +FROM postgres:16-bullseye + +ENV POSTGIS_MAJOR=3 +ENV POSTGIS_VERSION=3.5.0+dfsg-1.pgdg110+1 +ENV TIMESCALE_MAJOR=2 +ENV TIMESCALE_MINOR=17 + +RUN apt-get update \ + && apt-get install -y --no-install-recommends lsb-release curl gnupg apt-transport-https wget \ + # ca-certificates: for accessing remote raster files; + # fix: https://github.com/postgis/docker-postgis/issues/307 + ca-certificates \ + postgresql-$PG_MAJOR-postgis-$POSTGIS_MAJOR=$POSTGIS_VERSION \ + postgresql-$PG_MAJOR-postgis-$POSTGIS_MAJOR-scripts \ + # Add timescale repository and key + && echo "deb https://packagecloud.io/timescale/timescaledb/debian/ $(lsb_release -c -s) main" \ + | tee /etc/apt/sources.list.d/timescaledb.list \ + && wget --quiet -O - https://packagecloud.io/timescale/timescaledb/gpgkey \ + | gpg --dearmor -o /etc/apt/trusted.gpg.d/timescaledb.gpg \ + # Install timescaledb + && apt-get update \ + && apt-get install -y --no-install-recommends timescaledb-$TIMESCALE_MAJOR-postgresql-$PG_MAJOR="$TIMESCALE_MAJOR.$TIMESCALE_MINOR*" postgresql-client-$PG_MAJOR \ + # Remove temporary files + && rm -rf /var/lib/apt/lists/* \ + && rm -f /etc/apt/trusted.gpg.d/timescaledb.gpg + + +RUN mkdir -p /docker-entrypoint-initdb.d + +# Set up TimescaleDB extension during database initialization +RUN echo "shared_preload_libraries='timescaledb'" >> /usr/share/postgresql/postgresql.conf.sample diff --git a/data/docker-compose.yml b/data/docker-compose.yml index 5d797d9e..455d0ed7 100644 --- a/data/docker-compose.yml +++ b/data/docker-compose.yml @@ -62,8 +62,8 @@ services: context: . dockerfile: Dockerfile-pg environment: - - PGPORT=5433 - - POSTGRES_PASSWORD + PGPORT: 5433 + POSTGRES_PASSWORD: restart: always ports: - '5433:5433' @@ -75,5 +75,24 @@ services: extra_hosts: - host.docker.internal:host-gateway + postgres-timescale: + container_name: cagp-postgres-timescale + build: + context: . + dockerfile: Dockerfile-timescale + environment: + PGPORT: 5434 + POSTGRES_PASSWORD: + restart: always + ports: + - '5434:5434' + volumes: + - timescale_database_volume:/var/lib/postgresql/data + - ./init_pg_timescale.sql:/docker-entrypoint-initdb.d/init_pg.sql + - /etc/timezone:/etc/timezone:ro + - /etc/localtime:/etc/localtime:ro + extra_hosts: + - host.docker.internal:host-gateway volumes: database_volume: + timescale_database_volume: diff --git a/data/init_pg_timescale.sql b/data/init_pg_timescale.sql new file mode 100644 index 00000000..ab66f3af --- /dev/null +++ b/data/init_pg_timescale.sql @@ -0,0 +1,5 @@ +CREATE DATABASE vacantlotdb; +\c vacantlotdb; +CREATE EXTENSION postgis; +CREATE EXTENSION pg_stat_statements; +CREATE EXTENSION timescaledb; diff --git a/docs/SETUP/BACK_END.md b/docs/SETUP/BACK_END.md index 7906385f..7302b92f 100644 --- a/docs/SETUP/BACK_END.md +++ b/docs/SETUP/BACK_END.md @@ -118,6 +118,22 @@ To stop the postgres container run: docker compose down postgres ``` +### PostgreSQL Extensions + +We use Postgres extensions for GIS and time series functionality not included in base Postgres. + +#### PostGIS +[PostGIS](https://postgis.net/) is an open-source extension for PostgreSQL that adds support for spatial and geographic data types and functions. It enables the storage, querying, and analysis of location-based data directly within the database, replacing the need for many external tools and libraries. + +#### Timescale DB +[TimescaleDB](https://docs.timescale.com/) is an open-source relational database built on PostgreSQL, optimized for handling time-series data efficiently. + +At the core of TimescaleDB are hypertables, which partition data across time for efficient querying. Hypertables behave like normal Postgres tables, but are optimized for querying data based on timestamps. For our use case, hypertables simplify data management by automatically creating monthly partitions, replacing our previous method of manually creating a separate schema for each month. + +#### pg_stat_statements + +The [pg_stat_statements](https://www.postgresql.org/docs/current/pgstatstatements.html) extension provides detailed statistics on query performance, helping to identify slow or resource-intensive queries. It tracks execution counts, execution times, and rows returned, making it a useful tool for analyzing slow or problematic queries. + ## Python Development You can set up your local Python environment so you can develop and run the backend `script.py` and create and run unit tests outside of Docker. Build your local environment to match what is defined in the `Dockerfile`. Install the same python version as is in the Dockerfile, using `pyenv` to manage multiple distributions if needed. Use `pipenv` to create a virtual environment. Install the pip dependencies that are defined in the `Pipfile` into your virtual environment. Install the executables with `apt-get`. Now you can develop in Python in your terminal and IDE and run unit tests with `pytest`.