diff --git a/.travis.yml b/.travis.yml index 6db595f8..6b061cd6 100644 --- a/.travis.yml +++ b/.travis.yml @@ -24,3 +24,23 @@ branches: except: - stable - release-v2.0 + + +stages: + - Flake8 + - test + +jobs: + include: + - stage: Flake8 + env: Flake8=True + install: + - bash bin/travis-build.bash + - pip install flake8==3.5.0 + - pip install pycodestyle==2.3.0 + script: + - flake8 --version + # stop the build if there are Python syntax errors or undefined names + - flake8 . --count --select=E901,E999,F821,F822,F823 --show-source --statistics --exclude ckan,ckanext-harvest + # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide + - flake8 . --count --max-line-length=127 --statistics --exclude ckan,ckanext-harvest diff --git a/bin/ckan_pycsw.py b/bin/ckan_pycsw.py index 471c71c6..80e61626 100644 --- a/bin/ckan_pycsw.py +++ b/bin/ckan_pycsw.py @@ -10,10 +10,15 @@ import pycsw.config import pycsw.admin +import os +import argparse +from ConfigParser import SafeConfigParser + logging.basicConfig(format='%(message)s', level=logging.INFO) log = logging.getLogger(__name__) + def setup_db(pycsw_config): """Setup database tables and indexes""" @@ -28,9 +33,9 @@ def setup_db(pycsw_config): ] pycsw.admin.setup_db(database, - table_name, '', - create_plpythonu_functions=False, - extra_columns=ckan_columns) + table_name, '', + create_plpythonu_functions=False, + extra_columns=ckan_columns) def set_keywords(pycsw_config_file, pycsw_config, ckan_url, limit=20): @@ -63,7 +68,8 @@ def load(pycsw_config, ckan_url): log.info('Started gathering CKAN datasets identifiers: {0}'.format(str(datetime.datetime.now()))) - query = 'api/search/dataset?qjson={"fl":"id,metadata_modified,extras_harvest_object_id,extras_metadata_source", "q":"harvest_object_id:[\\"\\" TO *]", "limit":1000, "start":%s}' + query = 'api/search/dataset?qjson={"fl":"id,metadata_modified,extras_harvest_object_id,' \ + 'extras_metadata_source", "q":"harvest_object_id:[\\"\\" TO *]", "limit":1000, "start":%s}' start = 0 @@ -75,7 +81,7 @@ def load(pycsw_config, ckan_url): response = requests.get(url) listing = response.json() if not isinstance(listing, dict): - raise RuntimeError, 'Wrong API response: %s' % listing + raise RuntimeError('Wrong API response: %s' % listing) results = listing.get('results') if not results: break @@ -111,8 +117,7 @@ def load(pycsw_config, ckan_url): for ckan_id in deleted: try: repo.session.begin() - repo.session.query(repo.dataset.ckan_id).filter_by( - ckan_id=ckan_id).delete() + repo.session.query(repo.dataset.ckan_id).filter_by(ckan_id=ckan_id).delete() log.info('Deleted %s' % ckan_id) repo.session.commit() except Exception, err: @@ -137,17 +142,16 @@ def load(pycsw_config, ckan_url): if not record: continue update_dict = dict([(getattr(repo.dataset, key), - getattr(record, key)) \ - for key in record.__dict__.keys() if key != '_sa_instance_state']) + getattr(record, key)) + for key in record.__dict__.keys() if key != '_sa_instance_state']) try: repo.session.begin() - repo.session.query(repo.dataset).filter_by( - ckan_id=ckan_id).update(update_dict) + repo.session.query(repo.dataset).filter_by(ckan_id=ckan_id).update(update_dict) repo.session.commit() log.info('Changed %s' % ckan_id) except Exception, err: repo.session.rollback() - raise RuntimeError, 'ERROR: %s' % str(err) + raise RuntimeError('ERROR: %s' % str(err)) def clear(pycsw_config): @@ -192,7 +196,7 @@ def get_record(context, repo, ckan_url, ckan_id, ckan_info): return record -usage=''' +usage = ''' Manages the CKAN-pycsw integration python ckan-pycsw.py setup [-p] @@ -219,6 +223,7 @@ def get_record(context, repo, ckan_url, ckan_id, ckan_info): ''' + def _load_config(file_path): abs_path = os.path.abspath(file_path) if not os.path.exists(abs_path): @@ -230,25 +235,18 @@ def _load_config(file_path): return config - -import os -import argparse -from ConfigParser import SafeConfigParser - if __name__ == '__main__': - parser = argparse.ArgumentParser( - description='\n'.split(usage)[0], - usage=usage) - parser.add_argument('command', - help='Command to perform') + parser = argparse.ArgumentParser(description='\n'.split(usage)[0], + usage=usage) + parser.add_argument('command', help='Command to perform') parser.add_argument('-p', '--pycsw_config', - action='store', default='default.cfg', - help='pycsw config file to use.') + action='store', default='default.cfg', + help='pycsw config file to use.') parser.add_argument('-u', '--ckan_url', - action='store', - help='CKAN instance to import the datasets from.') + action='store', + help='CKAN instance to import the datasets from.') if len(sys.argv) <= 1: parser.print_usage() diff --git a/ckanext/spatial/commands/__init__.py b/ckanext/spatial/commands/__init__.py index d2547b8d..267f7100 100644 --- a/ckanext/spatial/commands/__init__.py +++ b/ckanext/spatial/commands/__init__.py @@ -3,4 +3,4 @@ pkg_resources.declare_namespace(__name__) except ImportError: import pkgutil - __path__ = pkgutil.extend_path(__path__, __name__) \ No newline at end of file + __path__ = pkgutil.extend_path(__path__, __name__) diff --git a/ckanext/spatial/commands/csw.py b/ckanext/spatial/commands/csw.py index 88517a6b..f1899922 100644 --- a/ckanext/spatial/commands/csw.py +++ b/ckanext/spatial/commands/csw.py @@ -5,6 +5,8 @@ from paste import script log = logging.getLogger(__name__) + + class Pycsw(script.command.Command): '''Manages the CKAN-pycsw integration @@ -35,10 +37,14 @@ class Pycsw(script.command.Command): ''' parser = script.command.Command.standard_parser(verbose=True) - parser.add_option('-p', '--pycsw-config', dest='pycsw_config', - default='default.cfg', help='pycsw config file to use.') - parser.add_option('-u', '--ckan-url', dest='ckan_url', - default='http://localhost', help='CKAN instance to import the datasets from.') + parser.add_option('-p', '--pycsw-config', + dest='pycsw_config', + default='default.cfg', + help='pycsw config file to use.') + parser.add_option('-u', '--ckan-url', + dest='ckan_url', + default='http://localhost', + help='CKAN instance to import the datasets from.') summary = __doc__.split('\n')[0] usage = __doc__ diff --git a/ckanext/spatial/commands/spatial.py b/ckanext/spatial/commands/spatial.py index 8f75af6a..bf8dbe03 100644 --- a/ckanext/spatial/commands/spatial.py +++ b/ckanext/spatial/commands/spatial.py @@ -1,6 +1,4 @@ import sys -import re -from pprint import pprint import logging from ckan.lib.cli import CkanCommand @@ -8,6 +6,7 @@ from ckanext.spatial.lib import save_package_extent log = logging.getLogger(__name__) + class Spatial(CkanCommand): '''Performs spatially related operations. @@ -20,7 +19,7 @@ class Spatial(CkanCommand): spatial extents Creates or updates the extent geometry column for datasets with an extent defined in the 'spatial' extra. - + The commands should be run from the ckanext-spatial directory and expect a development.ini file to be present. Most of the time you will specify the config explicitly though:: @@ -31,7 +30,7 @@ class Spatial(CkanCommand): summary = __doc__.split('\n')[0] usage = __doc__ - max_args = 2 + max_args = 2 min_args = 0 def command(self): @@ -43,7 +42,7 @@ def command(self): sys.exit(1) cmd = self.args[0] if cmd == 'initdb': - self.initdb() + self.initdb() elif cmd == 'extents': self.update_extents() else: @@ -56,16 +55,16 @@ def initdb(self): srid = None from ckanext.spatial.model import setup as db_setup - + db_setup(srid) print 'DB tables created' def update_extents(self): - from ckan.model import PackageExtra, Package, Session - conn = Session.connection() - packages = [extra.package \ - for extra in \ + from ckan.model import PackageExtra, Session + Session.connection() + packages = [extra.package + for extra in Session.query(PackageExtra).filter(PackageExtra.key == 'spatial').all()] errors = [] @@ -77,21 +76,19 @@ def update_extents(self): geometry = json.loads(value) count += 1 - except ValueError,e: - errors.append(u'Package %s - Error decoding JSON object: %s' % (package.id,str(e))) - except TypeError,e: - errors.append(u'Package %s - Error decoding JSON object: %s' % (package.id,str(e))) + except ValueError, e: + errors.append(u'Package %s - Error decoding JSON object: %s' % (package.id, str(e))) + except TypeError, e: + errors.append(u'Package %s - Error decoding JSON object: %s' % (package.id, str(e))) - save_package_extent(package.id,geometry) - + save_package_extent(package.id, geometry) Session.commit() - + if errors: msg = 'Errors were found:\n%s' % '\n'.join(errors) print msg - msg = "Done. Extents generated for %i out of %i packages" % (count,len(packages)) + msg = "Done. Extents generated for %i out of %i packages" % (count, len(packages)) print msg - diff --git a/ckanext/spatial/commands/validation.py b/ckanext/spatial/commands/validation.py index b2619673..ecd809ca 100644 --- a/ckanext/spatial/commands/validation.py +++ b/ckanext/spatial/commands/validation.py @@ -1,5 +1,4 @@ import sys -import re import os from pprint import pprint import logging @@ -10,6 +9,7 @@ log = logging.getLogger(__name__) + class Validation(CkanCommand): '''Validation commands @@ -21,7 +21,7 @@ class Validation(CkanCommand): validation report-csv .csv Performs validation on all the harvested metadata in the db and writes a report in CSV format to the given filepath. - + validation file .xml Performs validation on the given metadata file. ''' @@ -49,7 +49,6 @@ def command(self): def report(self): from ckan import model - from ckanext.harvest.model import HarvestObject from ckanext.spatial.lib.reports import validation_report if len(self.args) >= 2: @@ -92,7 +91,7 @@ def validate_file(self): print 'ERROR: Unicode Error reading file \'%s\': %s' % \ (metadata_filepath, e) sys.exit(1) - #import pdb; pdb.set_trace() + # import pdb; pdb.set_trace() xml = etree.fromstring(xml_string) # XML validation @@ -102,11 +101,11 @@ def validate_file(self): if valid: try: iso_document = ISODocument(xml_string) - iso_values = iso_document.read_values() + iso_document.read_values() except Exception, e: valid = False errors.append('CKAN exception reading values from ISODocument: %s' % e) - + print '***************' print 'Summary' print '***************' diff --git a/ckanext/spatial/controllers/__init__.py b/ckanext/spatial/controllers/__init__.py index d0ed2fca..267f7100 100644 --- a/ckanext/spatial/controllers/__init__.py +++ b/ckanext/spatial/controllers/__init__.py @@ -4,4 +4,3 @@ except ImportError: import pkgutil __path__ = pkgutil.extend_path(__path__, __name__) - diff --git a/ckanext/spatial/controllers/api.py b/ckanext/spatial/controllers/api.py index b0cb961b..b4daf6cd 100644 --- a/ckanext/spatial/controllers/api.py +++ b/ckanext/spatial/controllers/api.py @@ -26,7 +26,7 @@ def spatial_query(self): error_400_msg = \ 'Please provide a suitable bbox parameter [minx,miny,maxx,maxy]' - if not 'bbox' in request.params: + if 'bbox' not in request.params: abort(400, error_400_msg) bbox = validate_bbox(request.params['bbox']) @@ -127,7 +127,7 @@ def display_xml_original(self, id): response.headers['Content-Type'] = 'application/xml; charset=utf-8' response.headers['Content-Length'] = len(content) - if not '\n' + content return content.encode('utf-8') diff --git a/ckanext/spatial/geoalchemy_common.py b/ckanext/spatial/geoalchemy_common.py index 308455de..fed30449 100644 --- a/ckanext/spatial/geoalchemy_common.py +++ b/ckanext/spatial/geoalchemy_common.py @@ -13,7 +13,7 @@ if toolkit.check_ckan_version(min_version='2.3'): # CKAN >= 2.3, use GeoAlchemy2 - from geoalchemy2.elements import WKTElement + from geoalchemy2.elements import WKTElement # noqa from geoalchemy2 import Geometry from sqlalchemy import func ST_Transform = func.ST_Transform @@ -23,7 +23,7 @@ else: # CKAN < 2.3, use GeoAlchemy - from geoalchemy import WKTSpatialElement as WKTElement + from geoalchemy import WKTSpatialElement as WKTElement # noqa from geoalchemy import functions ST_Transform = functions.transform ST_Equals = functions.equals diff --git a/ckanext/spatial/harvesters/__init__.py b/ckanext/spatial/harvesters/__init__.py index 0093d426..4f129fb5 100644 --- a/ckanext/spatial/harvesters/__init__.py +++ b/ckanext/spatial/harvesters/__init__.py @@ -9,3 +9,5 @@ from ckanext.spatial.harvesters.csw import CSWHarvester from ckanext.spatial.harvesters.waf import WAFHarvester from ckanext.spatial.harvesters.doc import DocHarvester + +__all__ = ['CSWHarvester', 'WAFHarvester', 'DocHarvester'] diff --git a/ckanext/spatial/harvesters/base.py b/ckanext/spatial/harvesters/base.py index 4bac371e..ec112f66 100644 --- a/ckanext/spatial/harvesters/base.py +++ b/ckanext/spatial/harvesters/base.py @@ -92,7 +92,7 @@ def guess_resource_format(url, use_mimetypes=True): return resource_type file_types = { - 'kml' : ('kml',), + 'kml': ('kml',), 'kmz': ('kmz',), 'gml': ('gml',), } @@ -122,7 +122,7 @@ class SpatialHarvester(HarvesterBase): {"type": "Polygon", "coordinates": [[[$xmin, $ymin], [$xmax, $ymin], [$xmax, $ymax], [$xmin, $ymax], [$xmin, $ymin]]]} ''') - ## IHarvester + # IHarvester def validate_config(self, source_config): if not source_config: @@ -143,16 +143,16 @@ def validate_config(self, source_config): raise ValueError('Unknown validation profile(s): %s' % ','.join(unknown_profiles)) if 'default_tags' in source_config_obj: - if not isinstance(source_config_obj['default_tags'],list): + if not isinstance(source_config_obj['default_tags'], list): raise ValueError('default_tags must be a list') if 'default_extras' in source_config_obj: - if not isinstance(source_config_obj['default_extras'],dict): + if not isinstance(source_config_obj['default_extras'], dict): raise ValueError('default_extras must be a dictionary') for key in ('override_extras', 'clean_tags'): if key in source_config_obj: - if not isinstance(source_config_obj[key],bool): + if not isinstance(source_config_obj[key], bool): raise ValueError('%s must be boolean' % key) except ValueError, e: @@ -160,10 +160,7 @@ def validate_config(self, source_config): return source_config - ## - - ## SpatialHarvester - + # SpatialHarvester def get_package_dict(self, iso_values, harvest_object): ''' @@ -203,7 +200,7 @@ def get_package_dict(self, context, data_dict): :returns: A dataset dictionary (package_dict) :rtype: dict ''' - + tags = [] if 'tags' in iso_values: @@ -237,7 +234,8 @@ def get_package_dict(self, context, data_dict): if not name: name = self._gen_new_name(str(iso_values['guid'])) if not name: - raise Exception('Could not generate a unique name from the title or the GUID. Please choose a more unique title.') + raise Exception('Could not generate a unique name from the title or the GUID. ' + 'Please choose a more unique title.') package_dict['name'] = name else: package_dict['name'] = package.name @@ -287,7 +285,6 @@ def _extract_first_license_url(licences): if license_url_extracted: extras['licence_url'] = license_url_extracted - # Metadata license ID check for package use_constraints = iso_values.get('use-constraints') if use_constraints: @@ -307,7 +304,6 @@ def _extract_first_license_url(licences): package_dict['license_id'] = package_license break - extras['access_constraints'] = iso_values.get('limitations-on-public-access', '') # Grpahic preview @@ -320,7 +316,6 @@ def _extract_first_license_url(licences): if browse_graphic.get('type'): extras['graphic-preview-type'] = browse_graphic.get('type') - for key in ['temporal-extent-begin', 'temporal-extent-end']: if len(iso_values[key]) > 0: extras[key] = iso_values[key][0] @@ -350,7 +345,7 @@ def _extract_first_license_url(licences): ymax = float(bbox['north']) except ValueError, e: self._save_object_error('Error parsing bounding box value: {0}'.format(str(e)), - harvest_object, 'Import') + harvest_object, 'Import') else: # Construct a GeoJSON extent so ckanext-spatial can register the extent geometry @@ -361,7 +356,7 @@ def _extract_first_license_url(licences): x=xmin, y=ymin ) self._save_object_error('Point extent defined instead of polygon', - harvest_object, 'Import') + harvest_object, 'Import') else: extent_string = self.extent_template.substitute( xmin=xmin, ymin=ymin, xmax=xmax, ymax=ymax @@ -391,28 +386,27 @@ def _extract_first_license_url(licences): { 'url': url, 'name': resource_locator.get('name') or p.toolkit._('Unnamed resource'), - 'description': resource_locator.get('description') or '', + 'description': resource_locator.get('description') or '', 'resource_locator_protocol': resource_locator.get('protocol') or '', 'resource_locator_function': resource_locator.get('function') or '', }) package_dict['resources'].append(resource) - # Add default_extras from config - default_extras = self.source_config.get('default_extras',{}) + default_extras = self.source_config.get('default_extras', {}) if default_extras: - override_extras = self.source_config.get('override_extras',False) - for key,value in default_extras.iteritems(): - log.debug('Processing extra %s', key) - if not key in extras or override_extras: - # Look for replacement strings - if isinstance(value,basestring): - value = value.format(harvest_source_id=harvest_object.job.source.id, - harvest_source_url=harvest_object.job.source.url.strip('/'), - harvest_source_title=harvest_object.job.source.title, - harvest_job_id=harvest_object.job.id, - harvest_object_id=harvest_object.id) - extras[key] = value + override_extras = self.source_config.get('override_extras', False) + for key, value in default_extras.iteritems(): + log.debug('Processing extra %s', key) + if key not in extras or override_extras: + # Look for replacement strings + if isinstance(value, basestring): + value = value.format(harvest_source_id=harvest_object.job.source.id, + harvest_source_url=harvest_object.job.source.url.strip('/'), + harvest_source_title=harvest_object.job.source.title, + harvest_job_id=harvest_object.job.id, + harvest_object_id=harvest_object.id) + extras[key] = value extras_as_dict = [] for key, value in extras.iteritems(): @@ -456,9 +450,8 @@ def import_stage(self, harvest_object): # Get the last harvested object (if any) previous_object = model.Session.query(HarvestObject) \ - .filter(HarvestObject.guid==harvest_object.guid) \ - .filter(HarvestObject.current==True) \ - .first() + .filter(HarvestObject.guid == harvest_object.guid) \ + .filter(HarvestObject.current == True).first() # noqa if status == 'delete': # Delete package @@ -474,7 +467,7 @@ def import_stage(self, harvest_object): original_document = self._get_object_extra(harvest_object, 'original_document') original_format = self._get_object_extra(harvest_object, 'original_format') if original_document and original_format: - #DEPRECATED use the ISpatialHarvester interface method + # DEPRECATED use the ISpatialHarvester interface method self.__base_transform_to_iso_called = False content = self.transform_to_iso(original_document, original_format, harvest_object) if not self.__base_transform_to_iso_called: @@ -499,7 +492,8 @@ def import_stage(self, harvest_object): if not is_valid: # If validation errors were found, import will stop unless # configuration per source or per instance says otherwise - continue_import = p.toolkit.asbool(config.get('ckanext.spatial.harvest.continue_on_validation_errors', False)) or \ + continue_import = p.toolkit.asbool(config.get('ckanext.spatial.harvest.continue_on_validation_errors', + False)) or \ self.source_config.get('continue_on_validation_errors') if not continue_import: return False @@ -525,12 +519,11 @@ def import_stage(self, harvest_object): # First make sure there already aren't current objects # with the same guid existing_object = model.Session.query(HarvestObject.id) \ - .filter(HarvestObject.guid==iso_guid) \ - .filter(HarvestObject.current==True) \ - .first() + .filter(HarvestObject.guid == iso_guid) \ + .filter(HarvestObject.current == True).first() # noqa if existing_object: self._save_object_error('Object {0} already has this guid {1}'.format(existing_object.id, iso_guid), - harvest_object, 'Import') + harvest_object, 'Import') return False harvest_object.guid = iso_guid @@ -548,13 +541,12 @@ def import_stage(self, harvest_object): metadata_modified_date = dateutil.parser.parse(iso_values['metadata-date'], ignoretz=True) except ValueError: self._save_object_error('Could not extract reference date for object {0} ({1})' - .format(harvest_object.id, iso_values['metadata-date']), harvest_object, 'Import') + .format(harvest_object.id, iso_values['metadata-date']), harvest_object, 'Import') return False harvest_object.metadata_modified_date = metadata_modified_date harvest_object.add() - # Build the package dict package_dict = self.get_package_dict(iso_values, harvest_object) for harvester in p.PluginImplementations(ISpatialHarvester): @@ -577,7 +569,6 @@ def import_stage(self, harvest_object): if self._site_user and context['user'] == self._site_user['name']: context['ignore_auth'] = True - # The default package schema does not like Upper case tags tag_schema = logic.schema.default_tags_schema() tag_schema['name'] = [not_empty, unicode] @@ -615,7 +606,8 @@ def import_stage(self, harvest_object): elif status == 'change': # Check if the modified date is more recent - if not self.force_import and previous_object and harvest_object.metadata_modified_date <= previous_object.metadata_modified_date: + if not self.force_import and previous_object \ + and harvest_object.metadata_modified_date <= previous_object.metadata_modified_date: # Assign the previous job id to the new object to # avoid losing history @@ -628,21 +620,21 @@ def import_stage(self, harvest_object): # Reindex the corresponding package to update the reference to the # harvest object if ((config.get('ckanext.spatial.harvest.reindex_unchanged', True) != 'False' - or self.source_config.get('reindex_unchanged') != 'False') - and harvest_object.package_id): + or self.source_config.get('reindex_unchanged') != 'False') + and harvest_object.package_id): context.update({'validate': False, 'ignore_auth': True}) try: package_dict = logic.get_action('package_show')(context, - {'id': harvest_object.package_id}) + {'id': harvest_object.package_id}) except p.toolkit.ObjectNotFound: pass - else: - for extra in package_dict.get('extras', []): - if extra['key'] == 'harvest_object_id': - extra['value'] = harvest_object.id - if package_dict: - package_index = PackageSearchIndex() - package_index.index_package(package_dict) + else: + for extra in package_dict.get('extras', []): + if extra['key'] == 'harvest_object_id': + extra['value'] = harvest_object.id + if package_dict: + package_index = PackageSearchIndex() + package_index.index_package(package_dict) log.info('Document with GUID %s unchanged, skipping...' % (harvest_object.guid)) else: @@ -729,7 +721,6 @@ def _get_validator(self): if custom_validator not in all_validators: self._validator.add_validator(custom_validator) - return self._validator def _get_user_name(self): @@ -750,8 +741,8 @@ def _get_user_name(self): context = {'model': model, 'ignore_auth': True, - 'defer_commit': True, # See ckan/ckan#1714 - } + 'defer_commit': True, # See ckan/ckan#1714 + } self._site_user = p.toolkit.get_action('get_site_user')(context, {}) config_user_name = config.get('ckanext.spatial.harvest.user_name') @@ -824,7 +815,8 @@ def _validate_document(self, document_string, harvest_object, validator=None): valid, profile, errors = validator.is_valid(xml) if not valid: - log.error('Validation errors found using profile {0} for object with GUID {1}'.format(profile, harvest_object.guid)) + log.error('Validation errors found using profile {0} for object with GUID {1}' + .format(profile, harvest_object.guid)) for error in errors: self._save_object_error(error[0], harvest_object, 'Validation', line=error[1]) diff --git a/ckanext/spatial/harvesters/csw.py b/ckanext/spatial/harvesters/csw.py index 2853a10c..f82f521c 100644 --- a/ckanext/spatial/harvesters/csw.py +++ b/ckanext/spatial/harvesters/csw.py @@ -22,7 +22,7 @@ class CSWHarvester(SpatialHarvester, SingletonPlugin): ''' implements(IHarvester) - csw=None + csw = None def info(self): return { @@ -31,10 +31,9 @@ def info(self): 'description': 'A server that implements OGC\'s Catalog Service for the Web (CSW) standard' } - def get_original_url(self, harvest_object_id): obj = model.Session.query(HarvestObject).\ - filter(HarvestObject.id==harvest_object_id).\ + filter(HarvestObject.id == harvest_object_id).\ first() parts = urlparse.urlparse(obj.source.url) @@ -44,7 +43,7 @@ def get_original_url(self, harvest_object_id): 'VERSION': '2.0.2', 'REQUEST': 'GetRecordById', 'OUTPUTSCHEMA': 'http://www.isotc211.org/2005/gmd', - 'OUTPUTFORMAT':'application/xml' , + 'OUTPUTFORMAT': 'application/xml', 'ID': obj.guid } @@ -76,9 +75,10 @@ def gather_stage(self, harvest_job): self._save_gather_error('Error contacting the CSW server: %s' % e, harvest_job) return None - query = model.Session.query(HarvestObject.guid, HarvestObject.package_id).\ - filter(HarvestObject.current==True).\ - filter(HarvestObject.harvest_source_id==harvest_job.source.id) + query = model.Session.query(HarvestObject.guid, HarvestObject.package_id). \ + filter(HarvestObject.harvest_source_id == harvest_job.source.id).\ + filter(HarvestObject.current == True) # noqa + guid_to_package_id = {} for guid, package_id in query: @@ -101,10 +101,9 @@ def gather_stage(self, harvest_job): guids_in_harvest.add(identifier) except Exception, e: - self._save_gather_error('Error for the identifier %s [%r]' % (identifier,e), harvest_job) + self._save_gather_error('Error for the identifier %s [%r]' % (identifier, e), harvest_job) continue - except Exception, e: log.error('Exception: %s' % text_traceback()) self._save_gather_error('Error gathering the identifiers from the CSW server [%s]' % str(e), harvest_job) @@ -131,8 +130,8 @@ def gather_stage(self, harvest_job): package_id=guid_to_package_id[guid], extras=[HOExtra(key='status', value='delete')]) model.Session.query(HarvestObject).\ - filter_by(guid=guid).\ - update({'current': False}, False) + filter_by(guid=guid).\ + update({'current': False}, False) obj.save() ids.append(obj.id) @@ -142,7 +141,7 @@ def gather_stage(self, harvest_job): return ids - def fetch_stage(self,harvest_object): + def fetch_stage(self, harvest_object): # Check harvest object status status = self._get_object_extra(harvest_object, 'status') @@ -182,8 +181,8 @@ def fetch_stage(self,harvest_object): harvest_object.content = content.strip() harvest_object.save() - except Exception,e: - self._save_object_error('Error saving the harvest object for GUID %s [%r]' % \ + except Exception, e: + self._save_object_error('Error saving the harvest object for GUID %s [%r]' % (identifier, e), harvest_object) return False @@ -192,4 +191,3 @@ def fetch_stage(self,harvest_object): def _setup_csw_client(self, url): self.csw = CswService(url) - diff --git a/ckanext/spatial/harvesters/doc.py b/ckanext/spatial/harvesters/doc.py index e8a6daae..1b935343 100644 --- a/ckanext/spatial/harvesters/doc.py +++ b/ckanext/spatial/harvesters/doc.py @@ -27,18 +27,16 @@ def info(self): 'description': 'A single spatial metadata document' } - def get_original_url(self, harvest_object_id): obj = model.Session.query(HarvestObject).\ - filter(HarvestObject.id==harvest_object_id).\ + filter(HarvestObject.id == harvest_object_id).\ first() if not obj: return None return obj.source.url - - def gather_stage(self,harvest_job): + def gather_stage(self, harvest_job): log = logging.getLogger(__name__ + '.individual.gather') log.debug('DocHarvester gather_stage for job: %r', harvest_job) @@ -52,34 +50,31 @@ def gather_stage(self,harvest_job): # Get contents try: content = self._get_content_as_unicode(url) - except Exception,e: - self._save_gather_error('Unable to get content for URL: %s: %r' % \ - (url, e),harvest_job) + except Exception, e: + self._save_gather_error('Unable to get content for URL: %s: %r' % + (url, e), harvest_job) return None - existing_object = model.Session.query(HarvestObject.guid, HarvestObject.package_id).\ - filter(HarvestObject.current==True).\ - filter(HarvestObject.harvest_source_id==harvest_job.source.id).\ - first() + existing_object = model.Session.query(HarvestObject.guid, HarvestObject.package_id). \ + filter(HarvestObject.harvest_source_id == harvest_job.source.id). \ + filter(HarvestObject.current == True).first() # noqa def create_extras(url, status): return [HOExtra(key='doc_location', value=url), HOExtra(key='status', value=status)] if not existing_object: - guid=hashlib.md5(url.encode('utf8', 'ignore')).hexdigest() + guid = hashlib.md5(url.encode('utf8', 'ignore')).hexdigest() harvest_object = HarvestObject(job=harvest_job, - extras=create_extras(url, - 'new'), - guid=guid - ) + extras=create_extras(url, 'new'), + guid=guid + ) else: harvest_object = HarvestObject(job=harvest_job, - extras=create_extras(url, - 'change'), - guid=existing_object.guid, - package_id=existing_object.package_id - ) + extras=create_extras(url, 'change'), + guid=existing_object.guid, + package_id=existing_object.package_id + ) harvest_object.add() @@ -104,10 +99,6 @@ def create_extras(url, status): return [harvest_object.id] - - - - def fetch_stage(self,harvest_object): + def fetch_stage(self, harvest_object): # The fetching was already done in the previous stage return True - diff --git a/ckanext/spatial/harvesters/gemini.py b/ckanext/spatial/harvesters/gemini.py index 8dc65d45..66f278cb 100644 --- a/ckanext/spatial/harvesters/gemini.py +++ b/ckanext/spatial/harvesters/gemini.py @@ -52,7 +52,6 @@ class GeminiHarvester(SpatialHarvester): All three harvesters share the same import stage ''' - def import_stage(self, harvest_object): log = logging.getLogger(__name__ + '.import') log.debug('Import stage for harvest object: %r', harvest_object) @@ -65,7 +64,7 @@ def import_stage(self, harvest_object): self.obj = harvest_object if harvest_object.content is None: - self._save_object_error('Empty content for object %s' % harvest_object.id,harvest_object,'Import') + self._save_object_error('Empty content for object %s' % harvest_object.id, harvest_object, 'Import') return False try: self.import_gemini_object(harvest_object.content) @@ -95,13 +94,12 @@ def import_gemini_object(self, gemini_string): if not valid: out = errors[0][0] + ':\n' + '\n'.join(e[0] for e in errors[1:]) log.error('Errors found for object with GUID %s:' % self.obj.guid) - self._save_object_error(out,self.obj,'Import') + self._save_object_error(out, self.obj, 'Import') unicode_gemini_string = etree.tostring(xml, encoding=unicode, pretty_print=True) # may raise Exception for errors - package_dict = self.write_package_from_gemini_string(unicode_gemini_string) - + self.write_package_from_gemini_string(unicode_gemini_string) def write_package_from_gemini_string(self, content): '''Create or update a Package based on some content that has @@ -118,26 +116,25 @@ def write_package_from_gemini_string(self, content): # Save the metadata reference date in the Harvest Object try: - metadata_modified_date = datetime.strptime(gemini_values['metadata-date'],'%Y-%m-%d') + metadata_modified_date = datetime.strptime(gemini_values['metadata-date'], '%Y-%m-%d') except ValueError: try: - metadata_modified_date = datetime.strptime(gemini_values['metadata-date'],'%Y-%m-%dT%H:%M:%S') - except: - raise Exception('Could not extract reference date for GUID %s (%s)' \ - % (gemini_guid,gemini_values['metadata-date'])) + metadata_modified_date = datetime.strptime(gemini_values['metadata-date'], '%Y-%m-%dT%H:%M:%S') + except Exception: + raise Exception('Could not extract reference date for GUID %s (%s)' % + (gemini_guid, gemini_values['metadata-date'])) self.obj.metadata_modified_date = metadata_modified_date self.obj.save() last_harvested_object = Session.query(HarvestObject) \ - .filter(HarvestObject.guid==gemini_guid) \ - .filter(HarvestObject.current==True) \ - .all() + .filter(HarvestObject.guid == gemini_guid) \ + .filter(HarvestObject.current == True).all() # noqa if len(last_harvested_object) == 1: last_harvested_object = last_harvested_object[0] elif len(last_harvested_object) > 1: - raise Exception('Application Error: more than one current record for GUID %s' % gemini_guid) + raise Exception('Application Error: more than one current record for GUID %s' % gemini_guid) reactivate_package = False if last_harvested_object: @@ -152,7 +149,7 @@ def write_package_from_gemini_string(self, content): last_harvested_object.source.active is False): if self.force_import: - log.info('Import forced for object %s with GUID %s' % (self.obj.id,gemini_guid)) + log.info('Import forced for object %s with GUID %s' % (self.obj.id, gemini_guid)) else: log.info('Package for object with GUID %s needs to be created or updated' % gemini_guid) @@ -165,8 +162,9 @@ def write_package_from_gemini_string(self, content): log.info('Package for object with GUID %s will be re-activated' % gemini_guid) reactivate_package = True else: - log.info('Remote record with GUID %s is not more recent than a deleted package, skipping... ' % gemini_guid) - return None + log.info('Remote record with GUID %s is not more recent than a deleted package, skipping... ' + % gemini_guid) + return None else: if last_harvested_object.content != self.obj.content and \ @@ -175,7 +173,8 @@ def write_package_from_gemini_string(self, content): last_harvested_object.content.split('\n'), self.obj.content.split('\n')) diff = '\n'.join([line for line in diff_generator]) - raise Exception('The contents of document with GUID %s changed, but the metadata date has not been updated.\nDiff:\n%s' % (gemini_guid, diff)) + raise Exception('The contents of document with GUID %s changed, ' + 'but the metadata date has not been updated.\nDiff:\n%s' % (gemini_guid, diff)) else: # The content hasn't changed, no need to update the package log.info('Document with GUID %s unchanged, skipping...' % (gemini_guid)) @@ -195,8 +194,8 @@ def write_package_from_gemini_string(self, content): 'guid', # Usefuls 'dataset-reference-date', - 'metadata-language', # Language - 'metadata-date', # Released + 'metadata-language', # Language + 'metadata-date', # Released 'coupled-resource', 'contact-email', 'frequency-of-update', @@ -222,12 +221,12 @@ def write_package_from_gemini_string(self, content): if licence_url_extracted: extras['licence_url'] = licence_url_extracted - extras['access_constraints'] = gemini_values.get('limitations-on-public-access','') - if gemini_values.has_key('temporal-extent-begin'): - #gemini_values['temporal-extent-begin'].sort() + extras['access_constraints'] = gemini_values.get('limitations-on-public-access', '') + if 'temporal-extent-begin' in gemini_values: + # gemini_values['temporal-extent-begin'].sort() extras['temporal_coverage-from'] = gemini_values['temporal-extent-begin'] - if gemini_values.has_key('temporal-extent-end'): - #gemini_values['temporal-extent-end'].sort() + if 'temporal-extent-end' in gemini_values: + # gemini_values['temporal-extent-end'].sort() extras['temporal_coverage-to'] = gemini_values['temporal-extent-end'] # Save responsible organization roles @@ -236,7 +235,7 @@ def write_package_from_gemini_string(self, content): extras['provider'] = provider extras['responsible-party'] = '; '.join(responsible_parties) - if len(gemini_values['bbox']) >0: + if len(gemini_values['bbox']) > 0: extras['bbox-east-long'] = gemini_values['bbox'][0]['east'] extras['bbox-north-lat'] = gemini_values['bbox'][0]['north'] extras['bbox-south-lat'] = gemini_values['bbox'][0]['south'] @@ -244,10 +243,10 @@ def write_package_from_gemini_string(self, content): # Construct a GeoJSON extent so ckanext-spatial can register the extent geometry extent_string = self.extent_template.substitute( - xmin = extras['bbox-east-long'], - ymin = extras['bbox-south-lat'], - xmax = extras['bbox-west-long'], - ymax = extras['bbox-north-lat'] + xmin=extras['bbox-east-long'], + ymin=extras['bbox-south-lat'], + xmax=extras['bbox-west-long'], + ymax=extras['bbox-north-lat'] ) extras['spatial'] = extent_string.strip() @@ -255,18 +254,17 @@ def write_package_from_gemini_string(self, content): tags = [] for tag in gemini_values['tags']: tag = tag[:50] if len(tag) > 50 else tag - tags.append({'name':tag}) + tags.append({'name': tag}) package_dict = { 'title': gemini_values['title'], 'notes': gemini_values['abstract'], 'tags': tags, - 'resources':[] + 'resources': [] } if self.obj.source.publisher_id: - package_dict['groups'] = [{'id':self.obj.source.publisher_id}] - + package_dict['groups'] = [{'id': self.obj.source.publisher_id}] if reactivate_package: package_dict['state'] = u'active' @@ -276,7 +274,8 @@ def write_package_from_gemini_string(self, content): if not name: name = self.gen_new_name(str(gemini_guid)) if not name: - raise Exception('Could not generate a unique name from the title or the GUID. Please choose a more unique title.') + raise Exception('Could not generate a unique name from the title or the GUID. ' + 'Please choose a more unique title.') package_dict['name'] = name else: package_dict['name'] = package.name @@ -285,7 +284,7 @@ def write_package_from_gemini_string(self, content): if len(resource_locators): for resource_locator in resource_locators: - url = resource_locator.get('url','') + url = resource_locator.get('url', '') if url: resource_format = '' resource = {} @@ -299,12 +298,12 @@ def write_package_from_gemini_string(self, content): resource.update( { 'url': url, - 'name': resource_locator.get('name',''), - 'description': resource_locator.get('description') if resource_locator.get('description') else 'Resource locator', + 'name': resource_locator.get('name', ''), + 'description': resource_locator.get('description') if resource_locator.get('description') + else 'Resource locator', 'format': resource_format or None, - 'resource_locator_protocol': resource_locator.get('protocol',''), - 'resource_locator_function':resource_locator.get('function','') - + 'resource_locator_protocol': resource_locator.get('protocol', ''), + 'resource_locator_function': resource_locator.get('function', '') }) package_dict['resources'].append(resource) @@ -318,28 +317,28 @@ def write_package_from_gemini_string(self, content): view_resources[0]['ckan_recommended_wms_preview'] = True extras_as_dict = [] - for key,value in extras.iteritems(): - if isinstance(value,(basestring,Number)): - extras_as_dict.append({'key':key,'value':value}) + for key, value in extras.iteritems(): + if isinstance(value, (basestring, Number)): + extras_as_dict.append({'key': key, 'value': value}) else: - extras_as_dict.append({'key':key,'value':json.dumps(value)}) + extras_as_dict.append({'key': key, 'value': json.dumps(value)}) package_dict['extras'] = extras_as_dict - if package == None: + if package is None: # Create new package from data. package = self._create_package_from_data(package_dict) log.info('Created new package ID %s with GEMINI guid %s', package['id'], gemini_guid) else: - package = self._create_package_from_data(package_dict, package = package) + package = self._create_package_from_data(package_dict, package=package) log.info('Updated existing package ID %s with existing GEMINI guid %s', package['id'], gemini_guid) # Flag the other objects of this source as not current anymore from ckanext.harvest.model import harvest_object_table u = update(harvest_object_table) \ - .where(harvest_object_table.c.package_id==bindparam('b_package_id')) \ - .values(current=False) - Session.execute(u, params={'b_package_id':package['id']}) + .where(harvest_object_table.c.package_id == bindparam('b_package_id')) \ + .values(current=False) + Session.execute(u, params={'b_package_id': package['id']}) Session.commit() # Refresh current object from session, otherwise the @@ -427,7 +426,7 @@ def _extract_first_licence_url(self, licences): return licence return None - def _create_package_from_data(self, package_dict, package = None): + def _create_package_from_data(self, package_dict, package=None): ''' {'name': 'council-owned-litter-bins', 'notes': 'Location of Council owned litter bins within Borough.', @@ -453,15 +452,15 @@ def _create_package_from_data(self, package_dict, package = None): # The default package schema does not like Upper case tags tag_schema = logic.schema.default_tags_schema() - tag_schema['name'] = [not_empty,unicode] + tag_schema['name'] = [not_empty, unicode] package_schema['tags'] = tag_schema # TODO: user - context = {'model':model, - 'session':Session, - 'user':'harvest', - 'schema':package_schema, - 'extras_as_string':True, + context = {'model': model, + 'session': Session, + 'user': 'harvest', + 'schema': package_schema, + 'extras_as_string': True, 'api_version': '2'} if not package: # We need to explicitly provide a package ID, otherwise ckanext-spatial @@ -476,14 +475,14 @@ def _create_package_from_data(self, package_dict, package = None): try: package_dict = action_function(context, package_dict) - except ValidationError,e: + except ValidationError, e: raise Exception('Validation Error: %s' % str(e.error_summary)) if debug_exception_mode: raise return package_dict - def get_gemini_string_and_guid(self,content,url=None): + def get_gemini_string_and_guid(self, content, url=None): '''From a string buffer containing Gemini XML, return the tree under gmd:MD_Metadata and the GUID for it. @@ -505,7 +504,8 @@ def get_gemini_string_and_guid(self,content,url=None): gemini_xml = xml.find(metadata_tag) if gemini_xml is None: - self._save_gather_error('Content is not a valid Gemini document without the gmd:MD_Metadata element', self.harvest_job) + self._save_gather_error('Content is not a valid Gemini document without the gmd:MD_Metadata element', + self.harvest_job) gemini_string = etree.tostring(gemini_xml) gemini_document = GeminiDocument(gemini_string) @@ -516,13 +516,14 @@ def get_gemini_string_and_guid(self,content,url=None): return gemini_string, gemini_guid + class GeminiCswHarvester(GeminiHarvester, SingletonPlugin): ''' A Harvester for CSW servers ''' implements(IHarvester) - csw=None + csw = None def info(self): return { @@ -543,7 +544,6 @@ def gather_stage(self, harvest_job): self._save_gather_error('Error contacting the CSW server: %s' % e, harvest_job) return None - log.debug('Starting gathering for %s' % url) used_identifiers = [] ids = [] @@ -556,7 +556,7 @@ def gather_stage(self, harvest_job): continue if identifier is None: log.error('CSW returned identifier %r, skipping...' % identifier) - ## log an error here? happens with the dutch data + # log an error here? happens with the dutch data continue # Create a new HarvestObject for this identifier @@ -566,7 +566,7 @@ def gather_stage(self, harvest_job): ids.append(obj.id) used_identifiers.append(identifier) except Exception, e: - self._save_gather_error('Error for the identifier %s [%r]' % (identifier,e), harvest_job) + self._save_gather_error('Error for the identifier %s [%r]' % (identifier, e), harvest_job) continue except Exception, e: @@ -580,7 +580,7 @@ def gather_stage(self, harvest_job): return ids - def fetch_stage(self,harvest_object): + def fetch_stage(self, harvest_object): log = logging.getLogger(__name__ + '.CSW.fetch') log.debug('GeminiCswHarvester fetch_stage for object: %r', harvest_object) @@ -608,8 +608,8 @@ def fetch_stage(self,harvest_object): # Save the fetch contents in the HarvestObject harvest_object.content = record['xml'] harvest_object.save() - except Exception,e: - self._save_object_error('Error saving the harvest object for GUID %s [%r]' % \ + except Exception, e: + self._save_object_error('Error saving the harvest object for GUID %s [%r]' % (identifier, e), harvest_object) return False @@ -634,7 +634,7 @@ def info(self): 'description': 'A single GEMINI 2.1 document' } - def gather_stage(self,harvest_job): + def gather_stage(self, harvest_job): log = logging.getLogger(__name__ + '.individual.gather') log.debug('GeminiDocHarvester gather_stage for job: %r', harvest_job) @@ -646,13 +646,13 @@ def gather_stage(self,harvest_job): # Get contents try: content = self._get_content(url) - except Exception,e: - self._save_gather_error('Unable to get content for URL: %s: %r' % \ - (url, e),harvest_job) + except Exception, e: + self._save_gather_error('Unable to get content for URL: %s: %r' % + (url, e), harvest_job) return None try: # We need to extract the guid to pass it to the next stage - gemini_string, gemini_guid = self.get_gemini_string_and_guid(content,url) + gemini_string, gemini_guid = self.get_gemini_string_and_guid(content, url) if gemini_guid: # Create a new HarvestObject for this identifier @@ -669,13 +669,13 @@ def gather_stage(self,harvest_job): self._save_gather_error('Could not get the GUID for source %s' % url, harvest_job) return None except Exception, e: - self._save_gather_error('Error parsing the document. Is this a valid Gemini document?: %s [%r]'% (url,e),harvest_job) + self._save_gather_error('Error parsing the document. Is this a valid Gemini document?: %s [%r]' + % (url, e), harvest_job) if debug_exception_mode: raise return None - - def fetch_stage(self,harvest_object): + def fetch_stage(self, harvest_object): # The fetching was already done in the previous stage return True @@ -695,7 +695,7 @@ def info(self): 'description': 'A Web Accessible Folder (WAF) displaying a list of GEMINI 2.1 documents' } - def gather_stage(self,harvest_job): + def gather_stage(self, harvest_job): log = logging.getLogger(__name__ + '.WAF.gather') log.debug('GeminiWafHarvester gather_stage for job: %r', harvest_job) @@ -707,23 +707,23 @@ def gather_stage(self,harvest_job): # Get contents try: content = self._get_content(url) - except Exception,e: - self._save_gather_error('Unable to get content for URL: %s: %r' % \ - (url, e),harvest_job) + except Exception, e: + self._save_gather_error('Unable to get content for URL: %s: %r' % + (url, e), harvest_job) return None ids = [] try: - for url in self._extract_urls(content,url): + for url in self._extract_urls(content, url): try: content = self._get_content(url) except Exception, e: msg = 'Couldn\'t harvest WAF link: %s: %s' % (url, e) - self._save_gather_error(msg,harvest_job) + self._save_gather_error(msg, harvest_job) continue else: # We need to extract the guid to pass it to the next stage try: - gemini_string, gemini_guid = self.get_gemini_string_and_guid(content,url) + gemini_string, gemini_guid = self.get_gemini_string_and_guid(content, url) if gemini_guid: log.debug('Got GUID %s' % gemini_guid) # Create a new HarvestObject for this identifier @@ -736,28 +736,25 @@ def gather_stage(self,harvest_job): ids.append(obj.id) - - except Exception,e: - msg = 'Could not get GUID for source %s: %r' % (url,e) - self._save_gather_error(msg,harvest_job) + except Exception, e: + msg = 'Could not get GUID for source %s: %r' % (url, e) + self._save_gather_error(msg, harvest_job) continue - except Exception,e: + except Exception, e: msg = 'Error extracting URLs from %s' % url - self._save_gather_error(msg,harvest_job) + self._save_gather_error(msg, harvest_job) return None if len(ids) > 0: return ids else: - self._save_gather_error('Couldn\'t find any links to metadata files', - harvest_job) + self._save_gather_error('Couldn\'t find any links to metadata files', harvest_job) return None - def fetch_stage(self,harvest_object): + def fetch_stage(self, harvest_object): # The fetching was already done in the previous stage return True - def _extract_urls(self, content, base_url): ''' Get the URLs out of a WAF index page @@ -795,5 +792,3 @@ def _extract_urls(self, content, base_url): base_url += '/' log.debug('WAF base URL: %s', base_url) return [base_url + i for i in urls] - - diff --git a/ckanext/spatial/harvesters/waf.py b/ckanext/spatial/harvesters/waf.py index 488e9603..e6f7ef84 100644 --- a/ckanext/spatial/harvesters/waf.py +++ b/ckanext/spatial/harvesters/waf.py @@ -5,7 +5,6 @@ import pyparsing as parse import requests from sqlalchemy.orm import aliased -from sqlalchemy.exc import DataError from ckan import model @@ -14,7 +13,6 @@ from ckanext.harvest.interfaces import IHarvester from ckanext.harvest.model import HarvestObject from ckanext.harvest.model import HarvestObjectExtra as HOExtra -import ckanext.harvest.queue as queue from ckanext.spatial.harvesters.base import SpatialHarvester, guess_standard @@ -36,17 +34,15 @@ def info(self): 'description': 'A Web Accessible Folder (WAF) displaying a list of spatial metadata documents' } - def get_original_url(self, harvest_object_id): url = model.Session.query(HOExtra.value).\ - filter(HOExtra.key=='waf_location').\ - filter(HOExtra.harvest_object_id==harvest_object_id).\ + filter(HOExtra.key == 'waf_location').\ + filter(HOExtra.harvest_object_id == harvest_object_id).\ first() return url[0] if url else None - - def gather_stage(self,harvest_job,collection_package_id=None): + def gather_stage(self, harvest_job, collection_package_id=None): log = logging.getLogger(__name__ + '.WAF.gather') log.debug('WafHarvester gather_stage for job: %r', harvest_job) @@ -62,46 +58,44 @@ def gather_stage(self,harvest_job,collection_package_id=None): response = requests.get(source_url, timeout=60) response.raise_for_status() except requests.exceptions.RequestException, e: - self._save_gather_error('Unable to get content for URL: %s: %r' % \ - (source_url, e),harvest_job) + self._save_gather_error('Unable to get content for URL: %s: %r' % + (source_url, e), harvest_job) return None content = response.content scraper = _get_scraper(response.headers.get('server')) - ###### Get current harvest object out of db ###### - - url_to_modified_db = {} ## mapping of url to last_modified in db - url_to_ids = {} ## mapping of url to guid in db + # Get current harvest object out of db + url_to_modified_db = {} # mapping of url to last_modified in db + url_to_ids = {} # mapping of url to guid in db HOExtraAlias1 = aliased(HOExtra) HOExtraAlias2 = aliased(HOExtra) query = model.Session.query(HarvestObject.guid, HarvestObject.package_id, HOExtraAlias1.value, HOExtraAlias2.value).\ - join(HOExtraAlias1, HarvestObject.extras).\ - join(HOExtraAlias2, HarvestObject.extras).\ - filter(HOExtraAlias1.key=='waf_modified_date').\ - filter(HOExtraAlias2.key=='waf_location').\ - filter(HarvestObject.current==True).\ - filter(HarvestObject.harvest_source_id==harvest_job.source.id) - + join(HOExtraAlias1, HarvestObject.extras).\ + join(HOExtraAlias2, HarvestObject.extras).\ + filter(HOExtraAlias1.key == 'waf_modified_date').\ + filter(HOExtraAlias2.key == 'waf_location'). \ + filter(HarvestObject.harvest_source_id == harvest_job.source.id).\ + filter(HarvestObject.current == True) # noqa for guid, package_id, modified_date, url in query: url_to_modified_db[url] = modified_date url_to_ids[url] = (guid, package_id) - ###### Get current list of records from source ###### + # Get current list of records from source - url_to_modified_harvest = {} ## mapping of url to last_modified in harvest + url_to_modified_harvest = {} # mapping of url to last_modified in harvest try: - for url, modified_date in _extract_waf(content,source_url,scraper): + for url, modified_date in _extract_waf(content, source_url, scraper): url_to_modified_harvest[url] = modified_date - except Exception,e: + except Exception, e: msg = 'Error extracting URLs from %s, error was %s' % (source_url, e) - self._save_gather_error(msg,harvest_job) + self._save_gather_error(msg, harvest_job) return None - ###### Compare source and db ###### + # Compare source and db harvest_locations = set(url_to_modified_harvest.keys()) old_locations = set(url_to_modified_db.keys()) @@ -112,8 +106,8 @@ def gather_stage(self,harvest_job,collection_package_id=None): change = [] for item in possible_changes: - if (not url_to_modified_harvest[item] or not url_to_modified_db[item] #if there is no date assume change - or url_to_modified_harvest[item] > url_to_modified_db[item]): + if (not url_to_modified_harvest[item] or not url_to_modified_db[item] # if there is no date assume change + or url_to_modified_harvest[item] > url_to_modified_db[item]): change.append(item) def create_extras(url, date, status): @@ -127,16 +121,15 @@ def create_extras(url, date, status): ) return extras - ids = [] for location in new: - guid=hashlib.md5(location.encode('utf8','ignore')).hexdigest() + guid = hashlib.md5(location.encode('utf8', 'ignore')).hexdigest() obj = HarvestObject(job=harvest_job, extras=create_extras(location, url_to_modified_harvest[location], 'new'), guid=guid - ) + ) obj.save() ids.append(obj.id) @@ -147,19 +140,19 @@ def create_extras(url, date, status): 'change'), guid=url_to_ids[location][0], package_id=url_to_ids[location][1], - ) + ) obj.save() ids.append(obj.id) for location in delete: obj = HarvestObject(job=harvest_job, - extras=create_extras('','', 'delete'), + extras=create_extras('', '', 'delete'), guid=url_to_ids[location][0], package_id=url_to_ids[location][1], - ) + ) model.Session.query(HarvestObject).\ - filter_by(guid=url_to_ids[location][0]).\ - update({'current': False}, False) + filter_by(guid=url_to_ids[location][0]).\ + update({'current': False}, False) obj.save() ids.append(obj.id) @@ -169,14 +162,13 @@ def create_extras(url, date, status): len(ids), len(new), len(change), len(delete))) return ids else: - self._save_gather_error('No records to change', - harvest_job) + self._save_gather_error('No records to change', harvest_job) return [] def fetch_stage(self, harvest_object): # Check harvest object status - status = self._get_object_extra(harvest_object,'status') + status = self._get_object_extra(harvest_object, 'status') if status == 'delete': # No need to fetch anything, just pass to the import stage @@ -221,27 +213,27 @@ def fetch_stage(self, harvest_object): return True -apache = parse.SkipTo(parse.CaselessLiteral("", include=True).suppress() \ - + parse.Optional(parse.Literal('')).suppress() \ - + parse.Optional(parse.Combine( +apache = parse.SkipTo(parse.CaselessLiteral("", include=True).suppress() \ + + parse.Optional(parse.Literal('')).suppress() \ + + parse.Optional(parse.Combine( parse.Word(parse.alphanums+'-') + - parse.Word(parse.alphanums+':') - ,adjacent=False, joinString=' ').setResultsName('date') + parse.Word(parse.alphanums+':'), + adjacent=False, joinString=' ').setResultsName('date') ) -iis = parse.SkipTo("
").suppress() \ - + parse.OneOrMore("
").suppress() \ - + parse.Optional(parse.Combine( - parse.Word(parse.alphanums+'/') + - parse.Word(parse.alphanums+':') + - parse.Word(parse.alphas) - , adjacent=False, joinString=' ').setResultsName('date') - ) \ - + parse.Word(parse.nums).suppress() \ - + parse.Literal('
").suppress() \ + + parse.OneOrMore("
").suppress() \ + + parse.Optional(parse.Combine( + parse.Word(parse.alphanums+'/') + + parse.Word(parse.alphanums+':') + + parse.Word(parse.alphas), + adjacent=False, joinString=' ').setResultsName('date') + ) \ + + parse.Word(parse.nums).suppress() \ + + parse.Literal('
service -''' +''' # noqa GUID = 'e269743a-cfda-4632-a939-0c8416ae801e' -GEMINI_MISSING_GUID = '''''' +GEMINI_MISSING_GUID = '''''' # noqa + class TestGatherMethods(HarvestFixtureBase): def setup(self): @@ -944,13 +947,16 @@ def test_get_gemini_string_and_guid__no_guid(self): assert_equal(res, (GEMINI_MISSING_GUID, '')) def test_get_gemini_string_and_guid__non_parsing(self): - content = '' # no closing tag + # no closing tag + content = '' + assert_raises(lxml.etree.XMLSyntaxError, self.harvester.get_gemini_string_and_guid, content) def test_get_gemini_string_and_guid__empty(self): content = '' assert_raises(lxml.etree.XMLSyntaxError, self.harvester.get_gemini_string_and_guid, content) + class TestImportStageTools: def test_licence_url_normal(self): assert_equal(GeminiHarvester._extract_first_licence_url( @@ -1023,7 +1029,7 @@ def test_responsible_organisation_multiple_roles(self): assert_equal(GeminiHarvester._process_responsible_organisation(responsible_organisation), ('Ordnance Survey', ['Distributor (distributor)', 'Ordnance Survey (publisher, custodian)', - ])) + ])) def test_responsible_organisation_blank_provider(self): # no owner or publisher, so blank provider @@ -1067,8 +1073,7 @@ def get_validation_errors(self, validation_test_filename): harvester = GeminiDocHarvester() # Gather stage for GeminiDocHarvester includes validation - object_ids = harvester.gather_stage(job) - + harvester.gather_stage(job) # Check the validation errors errors = '; '.join([gather_error.message for gather_error in job.gather_errors]) @@ -1125,7 +1130,9 @@ def test_10_service_fail_constraints_schematron(self): def test_11_service_fail_gemini_schematron(self): errors = self.get_validation_errors('11_Service_Invalid_GEMINI_Service_Type.xml') assert len(errors) > 0 - assert_in("Service type shall be one of 'discovery', 'view', 'download', 'transformation', 'invoke' or 'other' following INSPIRE generic names.", errors) + assert_in("Service type shall be one of" + " 'discovery', 'view', 'download', 'transformation', 'invoke' or 'other' " + "following INSPIRE generic names.", errors) def test_12_service_valid(self): errors = self.get_validation_errors('12_Service_Valid.xml') @@ -1135,4 +1142,5 @@ def test_13_dataset_fail_iso19139_schema_2(self): # This test Dataset has srv tags and only Service metadata should. errors = self.get_validation_errors('13_Dataset_Invalid_Element_srv.xml') assert len(errors) > 0 - assert_in('Element \'{http://www.isotc211.org/2005/srv}SV_ServiceIdentification\': This element is not expected.', errors) + assert_in('Element \'{http://www.isotc211.org/2005/srv}SV_ServiceIdentification\': ' + 'This element is not expected.', errors) diff --git a/ckanext/spatial/tests/test_validation.py b/ckanext/spatial/tests/test_validation.py index 860c2387..7f95bd94 100644 --- a/ckanext/spatial/tests/test_validation.py +++ b/ckanext/spatial/tests/test_validation.py @@ -7,10 +7,11 @@ # other validation tests are in test_harvest.py + class TestValidation: def _get_file_path(self, file_name): - return os.path.join(os.path.dirname(__file__), 'xml', file_name) + return os.path.join(os.path.dirname(__file__), 'xml', file_name) def get_validation_errors(self, validator, validation_test_filename): validation_test_filepath = self._get_file_path(validation_test_filename) @@ -45,13 +46,13 @@ def test_01_dataset_fail_iso19139_schema(self): def test_02_dataset_fail_constraints_schematron(self): errors = self.get_validation_errors(validation.ConstraintsSchematron14, - 'gemini2.1/validation/02_Dataset_Invalid_19139_Missing_Data_Format.xml') + 'gemini2.1/validation/02_Dataset_Invalid_19139_Missing_Data_Format.xml') assert len(errors) > 0 assert_in('MD_Distribution / MD_Format: count(distributionFormat + distributorFormat) > 0', errors) def test_03_dataset_fail_gemini_schematron(self): errors = self.get_validation_errors(validation.Gemini2Schematron, - 'gemini2.1/validation/03_Dataset_Invalid_GEMINI_Missing_Keyword.xml') + 'gemini2.1/validation/03_Dataset_Invalid_GEMINI_Missing_Keyword.xml') assert len(errors) > 0 assert_in('Descriptive keywords are mandatory', errors) @@ -71,20 +72,20 @@ def test_04_dataset_valid(self): def test_05_series_fail_iso19139_schema(self): errors = self.get_validation_errors(validation.ISO19139EdenSchema, - 'gemini2.1/validation/05_Series_Invalid_XSD_No_Such_Element.xml') + 'gemini2.1/validation/05_Series_Invalid_XSD_No_Such_Element.xml') assert len(errors) > 0 assert_in('(gmx.xsd)', errors) assert_in('\'{http://www.isotc211.org/2005/gmd}nosuchelement\': This element is not expected.', errors) def test_06_series_fail_constraints_schematron(self): errors = self.get_validation_errors(validation.ConstraintsSchematron14, - 'gemini2.1/validation/06_Series_Invalid_19139_Missing_Data_Format.xml') + 'gemini2.1/validation/06_Series_Invalid_19139_Missing_Data_Format.xml') assert len(errors) > 0 assert_in('MD_Distribution / MD_Format: count(distributionFormat + distributorFormat) > 0', errors) def test_07_series_fail_gemini_schematron(self): errors = self.get_validation_errors(validation.Gemini2Schematron, - 'gemini2.1/validation/07_Series_Invalid_GEMINI_Missing_Keyword.xml') + 'gemini2.1/validation/07_Series_Invalid_GEMINI_Missing_Keyword.xml') assert len(errors) > 0 assert_in('Descriptive keywords are mandatory', errors) @@ -93,22 +94,23 @@ def test_08_series_valid(self): def test_09_service_fail_iso19139_schema(self): errors = self.get_validation_errors(validation.ISO19139EdenSchema, - 'gemini2.1/validation/09_Service_Invalid_No_Such_Element.xml') + 'gemini2.1/validation/09_Service_Invalid_No_Such_Element.xml') assert len(errors) > 0 assert_in('(gmx.xsd & srv.xsd)', errors) assert_in('\'{http://www.isotc211.org/2005/gmd}nosuchelement\': This element is not expected.', errors) def test_10_service_fail_constraints_schematron(self): errors = self.get_validation_errors(validation.ConstraintsSchematron14, - 'gemini2.1/validation/10_Service_Invalid_19139_Level_Description.xml') + 'gemini2.1/validation/10_Service_Invalid_19139_Level_Description.xml') assert len(errors) > 0 assert_in("DQ_Scope: 'levelDescription' is mandatory if 'level' notEqual 'dataset' or 'series'.", errors) def test_11_service_fail_gemini_schematron(self): errors = self.get_validation_errors(validation.Gemini2Schematron, - 'gemini2.1/validation/11_Service_Invalid_GEMINI_Service_Type.xml') + 'gemini2.1/validation/11_Service_Invalid_GEMINI_Service_Type.xml') assert len(errors) > 0 - assert_in("Service type shall be one of 'discovery', 'view', 'download', 'transformation', 'invoke' or 'other' following INSPIRE generic names.", errors) + assert_in("Service type shall be one of 'discovery', 'view', 'download', 'transformation', " + "'invoke' or 'other' following INSPIRE generic names.", errors) def test_12_service_valid(self): self.assert_passes_all_gemini2_1_validation('gemini2.1/validation/12_Service_Valid.xml') @@ -116,17 +118,24 @@ def test_12_service_valid(self): def test_13_dataset_fail_iso19139_schema_2(self): # This test Dataset has srv tags and only Service metadata should. errors = self.get_validation_errors(validation.ISO19139EdenSchema, - 'gemini2.1/validation/13_Dataset_Invalid_Element_srv.xml') + 'gemini2.1/validation/13_Dataset_Invalid_Element_srv.xml') assert len(errors) > 0 assert_in('(gmx.xsd)', errors) - assert_in('Element \'{http://www.isotc211.org/2005/srv}SV_ServiceIdentification\': This element is not expected.', errors) + assert_in('Element \'{http://www.isotc211.org/2005/srv}SV_ServiceIdentification\': ' + 'This element is not expected.', errors) def test_schematron_error_extraction(self): validation_error_xml = ''' - + - Service type shall be one of 'discovery', 'view', 'download', 'transformation', 'invoke' or 'other' following INSPIRE generic names. + Service type shall be one of 'discovery', 'view', 'download', 'transformation', + 'invoke' or 'other' following INSPIRE generic names. @@ -140,7 +149,6 @@ def test_schematron_error_extraction(self): assert_in("/*[local-name()='MD_Metadata'", details) assert_in("Service type shall be one of 'discovery'", details) - def test_error_line_numbers(self): file_path = self._get_file_path('iso19139/dataset-invalid.xml') xml = etree.parse(file_path) diff --git a/ckanext/spatial/tests/xml_file_server.py b/ckanext/spatial/tests/xml_file_server.py index 31e62f07..9b7e0afb 100644 --- a/ckanext/spatial/tests/xml_file_server.py +++ b/ckanext/spatial/tests/xml_file_server.py @@ -7,20 +7,21 @@ PORT = 8999 + def serve(port=PORT): '''Serves test XML files over HTTP''' - + # Make sure we serve from the tests' XML directory os.chdir(os.path.join(os.path.dirname(os.path.abspath(__file__)), 'xml')) Handler = SimpleHTTPServer.SimpleHTTPRequestHandler - + class TestServer(SocketServer.TCPServer): allow_reuse_address = True - + httpd = TestServer(("", PORT), Handler) - + print 'Serving test HTTP server at port', PORT httpd_thread = Thread(target=httpd.serve_forever) diff --git a/ckanext/spatial/validation/__init__.py b/ckanext/spatial/validation/__init__.py index 8643dccc..56b6477e 100644 --- a/ckanext/spatial/validation/__init__.py +++ b/ckanext/spatial/validation/__init__.py @@ -6,4 +6,4 @@ import pkgutil __path__ = pkgutil.extend_path(__path__, __name__) -from validation import * +from validation import * # noqa diff --git a/ckanext/spatial/validation/validation.py b/ckanext/spatial/validation/validation.py index 28e8506a..461d183b 100644 --- a/ckanext/spatial/validation/validation.py +++ b/ckanext/spatial/validation/validation.py @@ -223,7 +223,7 @@ def is_valid(cls, xml): error_details = [] for error in errors: message, details = cls.extract_error_details(error) - if not message in messages_already_reported: + if message not in messages_already_reported: # TODO: perhaps can extract the source line from the # error location error_details.append((details, None)) @@ -316,6 +316,7 @@ def get_schematrons(cls): "xml/gemini2/Gemini2_R1r3.sch") as schema: return [cls.schematron(schema)] + all_validators = (ISO19139Schema, ISO19139EdenSchema, ISO19139NGDCSchema, @@ -338,7 +339,7 @@ def __init__(self, profiles=["iso19139", "constraints", "gemini2"]): self.validators[validator_class.name] = validator_class def add_validator(self, validator_class): - self.validators[validator_class.name] = validator_class + self.validators[validator_class.name] = validator_class def isvalid(self, xml): '''For backward compatibility''' @@ -358,13 +359,12 @@ def is_valid(self, xml): (is_valid, failed_profile_name, [(error_message_string, error_line_number)]) ''' - log.debug('Starting validation against profile(s) %s' % ','.join(self.profiles)) for name in self.profiles: validator = self.validators[name] is_valid, error_message_list = validator.is_valid(xml) if not is_valid: - #error_message_list.insert(0, 'Validating against "%s" profile failed' % validator.title) + # error_message_list.insert(0, 'Validating against "%s" profile failed' % validator.title) log.info('Validating against "%s" profile failed' % validator.title) log.debug('%r', error_message_list) return False, validator.name, error_message_list @@ -372,6 +372,7 @@ def is_valid(self, xml): log.info('Validation passed') return True, None, [] + if __name__ == '__main__': from sys import argv import logging diff --git a/doc/conf.py b/doc/conf.py index ab4097ca..d3c538bb 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -11,17 +11,17 @@ # All configuration values have a default; values that are commented out # serve to show the default. -import sys, os +import os # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. -#sys.path.insert(0, os.path.abspath('.')) +# sys.path.insert(0, os.path.abspath('.')) # -- General configuration ----------------------------------------------------- # If your documentation needs a minimal Sphinx version, state it here. -#needs_sphinx = '1.0' +# needs_sphinx = '1.0' # Add any Sphinx extension module names here, as strings. They can be extensions # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. @@ -34,7 +34,7 @@ source_suffix = '.rst' # The encoding of source files. -#source_encoding = 'utf-8-sig' +# source_encoding = 'utf-8-sig' # The master toctree document. master_doc = 'index' @@ -54,40 +54,40 @@ # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. -#language = None +# language = None # There are two options for replacing |today|: either, you set today to some # non-false value, then it is used: -#today = '' +# today = '' # Else, today_fmt is used as the format for a strftime call. -#today_fmt = '%B %d, %Y' +# today_fmt = '%B %d, %Y' # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. exclude_patterns = [] # The reST default role (used for this markup: `text`) to use for all documents. -#default_role = None +# default_role = None # If true, '()' will be appended to :func: etc. cross-reference text. -#add_function_parentheses = True +# add_function_parentheses = True # If true, the current module name will be prepended to all description # unit titles (such as .. function::). -#add_module_names = True +# add_module_names = True # If true, sectionauthor and moduleauthor directives will be shown in the # output. They are ignored by default. -#show_authors = False +# show_authors = False # The name of the Pygments (syntax highlighting) style to use. pygments_style = 'sphinx' # A list of ignored prefixes for module index sorting. -#modindex_common_prefix = [] +# modindex_common_prefix = [] # If true, keep warnings as "system message" paragraphs in the built documents. -#keep_warnings = False +# keep_warnings = False exclude_trees = ['.build'] @@ -102,15 +102,15 @@ html_theme = 'sphinx_rtd_theme' html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] -#html_theme = 'default' +# html_theme = 'default' # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. -#html_theme_options = {} +# html_theme_options = {} # Add any paths that contain custom themes here, relative to this directory. -#html_theme_path = [] +# html_theme_path = [] html_sidebars = { '**': ['globaltoc.html'] } @@ -118,19 +118,19 @@ # The name for this set of Sphinx documents. If None, it defaults to # " v documentation". -#html_title = None +# html_title = None # A shorter title for the navigation bar. Default is the same as html_title. -#html_short_title = None +# html_short_title = None # The name of an image file (relative to this directory) to place at the top # of the sidebar. -#html_logo = None +# html_logo = None # The name of an image file (within the static path) to use as favicon of the # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 # pixels large. -#html_favicon = None +# html_favicon = None # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, @@ -139,44 +139,44 @@ # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, # using the given strftime format. -#html_last_updated_fmt = '%b %d, %Y' +# html_last_updated_fmt = '%b %d, %Y' # If true, SmartyPants will be used to convert quotes and dashes to # typographically correct entities. -#html_use_smartypants = True +# html_use_smartypants = True # Custom sidebar templates, maps document names to template names. -#html_sidebars = {} +# html_sidebars = {} # Additional templates that should be rendered to pages, maps page names to # template names. -#html_additional_pages = {} +# html_additional_pages = {} # If false, no module index is generated. -#html_domain_indices = True +# html_domain_indices = True # If false, no index is generated. -#html_use_index = True +# html_use_index = True # If true, the index is split into individual pages for each letter. -#html_split_index = False +# html_split_index = False # If true, links to the reST sources are added to the pages. -#html_show_sourcelink = True +# html_show_sourcelink = True # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. -#html_show_sphinx = True +# html_show_sphinx = True # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. -#html_show_copyright = True +# html_show_copyright = True # If true, an OpenSearch description file will be output, and all pages will # contain a tag referring to it. The value of this option must be the # base URL from which the finished HTML is served. -#html_use_opensearch = '' +# html_use_opensearch = '' # This is the file name suffix for HTML files (e.g. ".xhtml"). -#html_file_suffix = None +# html_file_suffix = None # Output file base name for HTML help builder. htmlhelp_basename = 'ckanext-spatialdoc' @@ -185,14 +185,14 @@ # -- Options for LaTeX output -------------------------------------------------- latex_elements = { -# The paper size ('letterpaper' or 'a4paper'). -#'papersize': 'letterpaper', + # The paper size ('letterpaper' or 'a4paper'). + # 'papersize': 'letterpaper', -# The font size ('10pt', '11pt' or '12pt'). -#'pointsize': '10pt', + # The font size ('10pt', '11pt' or '12pt'). + # 'pointsize': '10pt', -# Additional stuff for the LaTeX preamble. -#'preamble': '', + # Additional stuff for the LaTeX preamble. + # 'preamble': '', } # Grouping the document tree into LaTeX files. List of tuples @@ -204,23 +204,23 @@ # The name of an image file (relative to this directory) to place at the top of # the title page. -#latex_logo = None +# latex_logo = None # For "manual" documents, if this is true, then toplevel headings are parts, # not chapters. -#latex_use_parts = False +# latex_use_parts = False # If true, show page references after internal links. -#latex_show_pagerefs = False +# latex_show_pagerefs = False # If true, show URL addresses after external links. -#latex_show_urls = False +# latex_show_urls = False # Documents to append as an appendix to all manuals. -#latex_appendices = [] +# latex_appendices = [] # If false, no module index is generated. -#latex_domain_indices = True +# latex_domain_indices = True # -- Options for manual page output -------------------------------------------- @@ -233,14 +233,14 @@ ] # If true, show URL addresses after external links. -#man_show_urls = False +# man_show_urls = False # -- Options for Texinfo output ------------------------------------------------ # Grouping the document tree into Texinfo files. List of tuples # (source start file, target name, title, author, -# dir menu entry, description, category) +# dir menu entry, description, category) texinfo_documents = [ ('index', 'ckanext-spatial', u'ckanext-spatial Documentation', u'Open Knowledge Foundation', 'ckanext-spatial', 'One line description of project.', @@ -248,16 +248,16 @@ ] # Documents to append as an appendix to all manuals. -#texinfo_appendices = [] +# texinfo_appendices = [] # If false, no module index is generated. -#texinfo_domain_indices = True +# texinfo_domain_indices = True # How to display URL addresses: 'footnote', 'no', or 'inline'. -#texinfo_show_urls = 'footnote' +# texinfo_show_urls = 'footnote' # If true, do not generate a @detailmenu in the "Top" node's menu. -#texinfo_no_detailmenu = False +# texinfo_no_detailmenu = False # Example configuration for intersphinx: refer to the Python standard library. diff --git a/setup.py b/setup.py index 294d1445..45cc20ef 100644 --- a/setup.py +++ b/setup.py @@ -1,29 +1,27 @@ from setuptools import setup, find_packages -import sys, os version = '0.2' setup( - name='ckanext-spatial', - version=version, - description="Geo-related plugins for CKAN", - long_description="""\ - """, - classifiers=[], # Get strings from http://pypi.python.org/pypi?%3Aaction=list_classifiers - keywords='', - author='Open Knowledge Foundation', - author_email='info@okfn.org', - url='http://okfn.org', - license='AGPL', - packages=find_packages(exclude=['ez_setup', 'examples', 'tests']), - namespace_packages=['ckanext'], - include_package_data=True, - zip_safe=False, - install_requires=[ - # -*- Extra requirements: -*- - ], - entry_points=\ - """ + name='ckanext-spatial', + version=version, + description="Geo-related plugins for CKAN", + long_description="""\ + """, + classifiers=[], # Get strings from http://pypi.python.org/pypi?%3Aaction=list_classifiers + keywords='', + author='Open Knowledge Foundation', + author_email='info@okfn.org', + url='http://okfn.org', + license='AGPL', + packages=find_packages(exclude=['ez_setup', 'examples', 'tests']), + namespace_packages=['ckanext'], + include_package_data=True, + zip_safe=False, + install_requires=[ + # -*- Extra requirements: -*- + ], + entry_points=""" [ckan.plugins] spatial_metadata=ckanext.spatial.plugin:SpatialMetadata spatial_query=ckanext.spatial.plugin:SpatialQuery @@ -46,5 +44,5 @@ [ckan.test_plugins] test_spatial_plugin = ckanext.spatial.tests.test_plugin.plugin:TestSpatialPlugin - """, + """, )