Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

30 create netcdf extractor with preview #31

Open
wants to merge 18 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .github/workflows/docker.yml
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@ jobs:
FOLDER: preview.geotiff
- name: pycsw
FOLDER: pycsw.extractor
- name: preview-netcdf
FOLDER: preview.netcdf
steps:
- uses: actions/checkout@v2

Expand Down
8 changes: 8 additions & 0 deletions preview.netcdf/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
FROM python:3.8

WORKDIR /extractor
COPY requirements.txt ./
RUN pip install -r requirements.txt

COPY netcdfutils.py ncsa.geo.netcdf.extractor.py extractor_info.json ./
CMD python ncsa.geo.netcdf.extractor.py
41 changes: 41 additions & 0 deletions preview.netcdf/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
# Clowder Geo NetCDF Extractor

Overview

This extractor uses python NetCDF4 and matplotlib to plot data from
.nc and .nc4 files on a map.

NOTE - this is supposed to be a general purpose extractor that should work on
many files, but because NetCDF is a flexible file format, it is not guaranteed to work.
If the data is a time series, it will generate 4 previews spaced evenly throughout the time interval.


## Build a docker image
docker build -t clowder/extractors-geo-netcdf .

## Test the docker container image:
docker run --name=geotiff-metadata -d --restart=always -e 'RABBITMQ_URI=amqp://user1:[email protected]:5672/clowder-dev' -e 'RABBITMQ_EXCHANGE=clowder' -e 'TZ=/usr/share/zoneinfo/US/Central' -e 'REGISTRATION_ENDPOINTS=http://dts-dev.ncsa.illinois.edu:9000/api/extractors?key=key1' clowder/extractors-geotiff-metadata

## To run without docker


1. Setup a [virtualenv](https://virtualenv.pypa.io), e.g., named "geo-netcdf":

`virtualenv geo-netcdf`
2. Activate the virtualenv

`source geo-netcdf`/bin/activate`
3. Install required python packages using *pip*

`pip install -r requirements.txt`
4. Install pyclowder if it is not installed yet.

`pip install git+https://opensource.ncsa.illinois.edu/stash/scm/cats/pyclowder.git`

or if you have pyclowder checked out as well (useful when developing)

`ln -s ../../pyClowder/pyclowder pyclowder`
5. Modify config.py
6. Start extractor

`./ncsa.geo.netcdf.py`
46 changes: 46 additions & 0 deletions preview.netcdf/extractor_info.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
{
"@context": "http://clowder.ncsa.illinois.edu/contexts/extractors.jsonld",
"name": "ncsa.pdg.asjp.netcdf.preview",
"version": "1.0",
"description": "Maps of NetCDF Data",
"author": "Todd Nicholson <[email protected]>",
"contributors": [
"Luigi Marini <[email protected]>"
],
"contexts": [
{
}
],
"repository": [
{
"repType": "git",
"repUrl": "https://opensource.ncsa.illinois.edu/stash/scm/cats/extractors-geo.git"
},
{
"repType": "docker",
"repUrl": "clowder/extractors-geo-netcdf-preview"
}
],
"process": {
"file": [
"*/"
]
},
"external_services": [
"geoserver"
],
"dependencies": [],
"bibtex": [],
"parameters": {
"schema": {
"directory": {
"type": "string",
"title": "projection"
}
}
},
"labels": [
"Type/Image",
"Domain/Geo"
]
}
82 changes: 82 additions & 0 deletions preview.netcdf/ncsa.geo.netcdf.extractor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
#!/usr/bin/env python

"""Example extractor based on the clowder code."""

import logging
import subprocess
import json
import pyclowder
from pyclowder.extractors import Extractor
import pyclowder.files
import os
import netCDF4 as nc
import sys
from netCDF4 import Dataset as Dataset
import matplotlib.pyplot as plt
import numpy as np
import mpl_toolkits
import netcdfutils
from mpl_toolkits.basemap import Basemap
plt.rcParams['figure.figsize'] = (16.0, 12.0)




class GeoNetCDF(Extractor):
"""Count the number of characters, words and lines in a text file."""
def __init__(self):
Extractor.__init__(self)

# add any additional arguments to parser
# self.parser.add_argument('--max', '-m', type=int, nargs='?', default=-1,
# help='maximum number (default=-1)')

# parse command line and load default logging configuration
self.setup()

logging.basicConfig(level=logging.INFO)
# setup logging for the exctractor


logging.getLogger('pyclowder').setLevel(logging.DEBUG)
logging.getLogger('__main__').setLevel(logging.DEBUG)

def process_message(self, connector, host, secret_key, resource, parameters, projection="Polar Stereographic'"):
# Process the file and upload the results

logger = logging.getLogger(__name__)
params = json.loads(parameters['parameters'])

inputfile = resource["local_paths"][0]
file_id = resource['id']
file_name = resource['name']
# These process messages will appear in the Clowder UI under Extractions.
connector.message_process(resource, "Loading contents of file...")
logger.debug("Preparing to generate plots")


png_filepaths = netcdfutils.generate_maps_for_file(path_to_file=inputfile)
for png_file in png_filepaths:
base_name = os.path.basename(png_file)
variable_name = base_name.replace(file_name, "")
variable_name = variable_name.lstrip('_')
variable_name = variable_name.rstrip('.png')
preview_id = pyclowder.files.upload_preview(connector, host, secret_key, file_id, png_file, None, "image/" + "png",
visualization_name=variable_name,
visualization_description=variable_name,
visualization_component_id="basic-image-component")
try:
os.remove(png_file)
except Exception as e:
logger.debug(f"Error removing {png_file}")
logger.debug(f"{e}")
try:
logger.debug("Cleaning up all png files")
os.system("rm *.png")
except Exception as e:
logger.debug(f"Error cleaning up files {e}")


if __name__ == "__main__":
extractor = GeoNetCDF()
extractor.start()
169 changes: 169 additions & 0 deletions preview.netcdf/netcdfutils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,169 @@
import os
import netCDF4 as nc
import sys
from netCDF4 import Dataset as Dataset
import matplotlib.pyplot as plt
import numpy as np
import mpl_toolkits
from mpl_toolkits.basemap import Basemap
plt.rcParams['figure.figsize'] = (16.0, 12.0)

#
sample_file_1 = 'ASJP_Year_2023_Day_218.nc4'
sample_file_2 = 'soilw.mon.1991-2020.ltm.v2.nc'
sample_file_3 = 'air.2x2.250.mon.1991-2020.ltm.comb.nc'
sample_file_4 = 'air.mon.mean.nc'
sample_file_5 = 'adaptor.mars.internal-1696624738.5653653-18904-2-b0069ad2-7c40-4404-acd9-d7cf76870e2a.nc'
sample_file_6 = 'adaptor.mars.internal-1696625608.8176327-14431-17-11b1bdd3-05c6-42ee-b9d4-dee178830ba1.nc'
path_to_file = os.path.join(os.getcwd(), sample_file_1)
#
print(os.path.exists(path_to_file))
print('exists?')



def generate_maps_for_file(path_to_file, projection='merc'):
previews_returned = []

ds1 = Dataset(path_to_file)
variable_names = list(ds1.variables.keys())

lat_name = ""
lon_name = ""
for variable in variable_names:
lowercase_variable = str(variable).lower()
if 'lat' in lowercase_variable:
lat_name = variable
if 'lon' in lowercase_variable:
lon_name = variable

# we now have the variable names, we need
# TODO we are explicitly plotting here

latitutde = ds1.variables[lat_name]
longitude = ds1.variables[lon_name]
latitude_shape = latitutde.shape
longitude_shape = longitude.shape
lat_lon_shape_values = []
for shape in latitude_shape:
lat_lon_shape_values.append(shape)
for shape in longitude_shape:
lat_lon_shape_values.append(shape)
variable_names_to_plot = []
for variable in variable_names:
if variable != lat_name and variable != lon_name:
current_variable = ds1[variable]
current_variable_shape = current_variable.shape
print(type(current_variable_shape))
shape_list = list(current_variable.shape)
has_lat_lon_values = set(lat_lon_shape_values).issubset(shape_list)
if has_lat_lon_values:
variable_names_to_plot.append(variable)

lats = ds1.variables[lat_name][:]
lons = ds1.variables[lon_name][:]
use_meshgrid = False
# if the lat and lon are 1 dimensional arrays, we need to use
# meshgrid to send in 2 dimensional arrays for matplotlib
if len(lats.shape) == 1 and len(lons.shape) == 1:
use_meshgrid = True

for variable in variable_names_to_plot:
print(variable)
current_variable = ds1[variable]
try:
if current_variable.long_name:
long_name = current_variable.long_name
except Exception as e:
long_name = current_variable.name
print('before range')
units = None
try:
units = current_variable.units
except Exception as e:
print("no units")
not_lat_lon_indices = []
current_variable_shape = current_variable.shape
current_variable_shape_list = list(current_variable_shape)
for i in range(0, len(current_variable_shape_list)):
if current_variable.shape[i] not in lat_lon_shape_values:
not_lat_lon_indices.append(i)
print('what does this variable have')
variable_data = current_variable[:]
if len(not_lat_lon_indices) == 2:
print('it is more than one')
print('we need to find the time variable')
for index in not_lat_lon_indices:
value = current_variable[:][index]
print('value')
if len(not_lat_lon_indices) == 1:
non_matching_shape_size = current_variable.shape[not_lat_lon_indices[0]]
quarter_time = int(np.floor(non_matching_shape_size / 4))
# with time series data, we will show quarterly previews

for i in range(0,4):
current_time_to_plot = int(np.floor(i*quarter_time))
current_time_variable_data = variable_data[current_time_to_plot]
print('plot this')
m2 = Basemap(projection=projection, llcrnrlat=-80, urcrnrlat=80,
llcrnrlon=-180, urcrnrlon=180, lat_ts=20, resolution='c')
# if we need to use a meshgrid for 1 dimensional lat and lon
if use_meshgrid:
gridlons, gridlats = np.meshgrid(lons, lats)
xi, yi = m2(gridlons, gridlats)
else:
xi, yi = m2(lons, lats)

squeezed_data = np.squeeze(current_time_variable_data)
max = np.nanmax(squeezed_data)
min = np.nanmin(squeezed_data)
# if min > 0:
# min = 0
cs2 = m2.pcolor(xi, yi, squeezed_data)
m2.drawcoastlines()
m2.drawcountries()
m2.drawparallels(np.arange(-90., 91., 30.))
m2.drawmeridians(np.arange(-180., 181., 60.))
cbar = m2.colorbar()
cbar.solids.set_edgecolor("face")
cbar.set_ticks([min,max])
title = long_name
if units:
title = title + '('+str(units)+')'
plt.title(title , fontdict={'fontsize': 26})

plot_name = long_name + str(i) + '_' + str(non_matching_shape_size) + '.png'
plt.savefig(plot_name)
previews_returned.append(plot_name)
plt.clf()
# if it is NOT time series data
if len(not_lat_lon_indices) == 0:
m2 = Basemap(projection='merc', llcrnrlat=-80, urcrnrlat=80,
llcrnrlon=-180, urcrnrlon=180, lat_ts=20, resolution='c')
# if we need to use a meshgrid for 1 dimensional lat and lon
if use_meshgrid:
gridlons, gridlats = np.meshgrid(lons, lats)
xi, yi = m2(gridlons, gridlats)
else:
xi, yi = m2(lons, lats)
squeezed_data = np.squeeze(variable_data)
max = np.nanmax(squeezed_data)
min = np.nanmin(squeezed_data)
# if min > 0:
# min = 0
cs2 = m2.pcolor(xi, yi, squeezed_data)
m2.drawcoastlines()
m2.drawcountries()
m2.drawparallels(np.arange(-90., 91., 30.))
m2.drawmeridians(np.arange(-180., 181., 60.))
cbar = m2.colorbar()
cbar.solids.set_edgecolor("face")
cbar.set_ticks([min, max])
plot_name = long_name + '.png'
plt.savefig(plot_name)
previews_returned.append(plot_name)
plt.clf()
return previews_returned

if __name__ == "__main__":
generate_maps_for_file(path_to_file=path_to_file)
7 changes: 7 additions & 0 deletions preview.netcdf/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
gsconfig-py3==1.0.8
pika>=1.0.0
requests>=2.10.0
wheel>=0.24.0
pyclowder
netCDF4
matplotlib
2 changes: 1 addition & 1 deletion pycsw.extractor/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ FROM ubuntu:20.04
MAINTAINER Yong Wook Kim <[email protected]>

ARG VERSION="unknown"
ARG BUILDNUMBER="unknown"
ARG BUILDNUMBER="unknown
ARG GITSHA1="unknown"

# copy requirements file
Expand Down