From c52e8c9ccdfb0437d9560c6e29d1859ad014578b Mon Sep 17 00:00:00 2001 From: Teemu Leivo Date: Mon, 25 Feb 2019 19:06:05 +0200 Subject: [PATCH 01/12] First Flake8 style changes --- ckanext/spatial/tests/lib/test_spatial.py | 22 ++-- .../tests/model/test_harvested_metadata.py | 3 + ckanext/spatial/tests/test_harvest.py | 121 +++++++++--------- ckanext/spatial/tests/test_validation.py | 25 ++-- ckanext/spatial/tests/xml_file_server.py | 9 +- ckanext/spatial/validation/__init__.py | 2 - ckanext/spatial/validation/validation.py | 7 +- doc/conf.py | 100 +++++++-------- 8 files changed, 147 insertions(+), 142 deletions(-) diff --git a/ckanext/spatial/tests/lib/test_spatial.py b/ckanext/spatial/tests/lib/test_spatial.py index 2e46aef0..5787babc 100644 --- a/ckanext/spatial/tests/lib/test_spatial.py +++ b/ckanext/spatial/tests/lib/test_spatial.py @@ -41,7 +41,6 @@ def test_different_points(self): assert not compare_geometry_fields(extent1.the_geom, extent2.the_geom) - class TestValidateBbox: bbox_dict = {'minx': -4.96, 'miny': 55.70, @@ -64,8 +63,14 @@ def test_bad_2(self): res = validate_bbox('random') assert_equal(res, None) + def bbox_2_geojson(bbox_dict): - return '{"type":"Polygon","coordinates":[[[%(minx)s, %(miny)s],[%(minx)s, %(maxy)s], [%(maxx)s, %(maxy)s], [%(maxx)s, %(miny)s], [%(minx)s, %(miny)s]]]}' % bbox_dict + return """{"type":"Polygon","coordinates":[[[%(minx)s, %(miny)s], + [%(minx)s, %(maxy)s], + [%(maxx)s, %(maxy)s], + [%(maxx)s, %(miny)s], + [%(minx)s, %(miny)s]]]}""" % bbox_dict + class SpatialQueryTestBase(SpatialTestBase): '''Base class for tests of spatial queries''' @@ -92,7 +97,7 @@ def create_package(cls, **package_dict): 'extras_as_string': True, 'api_version': 2, 'ignore_auth': True, - } + } package_dict = package_create(context, package_dict) return context.get('id') @@ -101,6 +106,7 @@ def x_values_to_bbox(cls, x_tuple): return {'minx': x_tuple[0], 'maxx': x_tuple[1], 'miny': cls.miny, 'maxy': cls.maxy} + class TestBboxQuery(SpatialQueryTestBase): # x values for the fixtures fixtures_x = [(0, 1), (0, 3), (0, 4), (4, 5), (6, 7)] @@ -112,6 +118,7 @@ def test_query(self): assert_equal(set(package_titles), set(('(0, 3)', '(0, 4)', '(4, 5)'))) + class TestBboxQueryOrdered(SpatialQueryTestBase): # x values for the fixtures fixtures_x = [(0, 9), (1, 8), (2, 7), (3, 6), (4, 5), @@ -132,18 +139,15 @@ def test_query(self): class TestBboxQueryPerformance(SpatialQueryTestBase): # x values for the fixtures - fixtures_x = [(random.uniform(0, 3), random.uniform(3,9)) \ - for x in xrange(10)] # increase the number to 1000 say + fixtures_x = [(random.uniform(0, 3), random.uniform(3, 9)) + for x in xrange(10)] # increase the number to 1000 say + def test_query(self): - bbox_dict = self.x_values_to_bbox((2, 7)) t0 = time.time() - q = bbox_query(bbox_dict) t1 = time.time() print 'bbox_query took: ', t1-t0 def test_query_ordered(self): - bbox_dict = self.x_values_to_bbox((2, 7)) t0 = time.time() - q = bbox_query_ordered(bbox_dict) t1 = time.time() print 'bbox_query_ordered took: ', t1-t0 diff --git a/ckanext/spatial/tests/model/test_harvested_metadata.py b/ckanext/spatial/tests/model/test_harvested_metadata.py index 2c91dc08..05d8d6bc 100644 --- a/ckanext/spatial/tests/model/test_harvested_metadata.py +++ b/ckanext/spatial/tests/model/test_harvested_metadata.py @@ -4,6 +4,7 @@ from ckanext.spatial.model import ISODocument + def open_xml_fixture(xml_filename): xml_filepath = os.path.join(os.path.dirname(__file__), 'xml', @@ -18,6 +19,7 @@ def open_xml_fixture(xml_filename): (metadata_filepath, e) return xml_string + def test_simple(): xml_string = open_xml_fixture('gemini_dataset.xml') iso_document = ISODocument(xml_string) @@ -25,6 +27,7 @@ def test_simple(): assert_equal(iso_values['guid'], 'test-dataset-1') assert_equal(iso_values['metadata-date'], '2011-09-23T10:06:08') + def test_multiplicity_warning(): # This dataset lacks a value for Metadata Date and should # produce a log.warning, but not raise an exception. diff --git a/ckanext/spatial/tests/test_harvest.py b/ckanext/spatial/tests/test_harvest.py index d7020d97..383b7e9b 100644 --- a/ckanext/spatial/tests/test_harvest.py +++ b/ckanext/spatial/tests/test_harvest.py @@ -460,11 +460,11 @@ def test_harvest_error_validation(self): message = obj.errors[0].message assert_in('One email address shall be provided', message) - assert_in('Service type shall be one of \'discovery\', \'view\', \'download\', \'transformation\', \'invoke\' or \'other\' following INSPIRE generic names', message) + assert_in("Service type shall be one of 'discovery', 'view', 'download', 'transformation', 'invoke' or 'other'" + "following INSPIRE generic names", message) assert_in('Limitations on public access code list value shall be \'otherRestrictions\'', message) assert_in('One organisation name shall be provided', message) - def test_harvest_update_records(self): # Create source @@ -479,7 +479,7 @@ def test_harvest_update_records(self): first_obj = self._run_job_for_single_document(first_job) - first_package_dict = get_action('package_show')(self.context,{'id':first_obj.package_id}) + first_package_dict = get_action('package_show')(self.context, {'id': first_obj.package_id}) # Package was created assert first_package_dict @@ -498,7 +498,7 @@ def test_harvest_update_records(self): Session.refresh(first_obj) Session.refresh(second_obj) - second_package_dict = get_action('package_show')(self.context,{'id':first_obj.package_id}) + second_package_dict = get_action('package_show')(self.context, {'id': first_obj.package_id}) # Package was not updated assert second_package_dict, first_package_dict['id'] == second_package_dict['id'] @@ -507,7 +507,7 @@ def test_harvest_update_records(self): # Create and run a third job, forcing the importing to simulate an update in the package third_job = self._create_job(source.id) - third_obj = self._run_job_for_single_document(third_job,force_import=True) + third_obj = self._run_job_for_single_document(third_job, force_import=True) # For some reason first_obj does not get updated after the import_stage, # and we have to force a refresh to get the actual DB values. @@ -520,7 +520,7 @@ def test_harvest_update_records(self): Session.refresh(second_obj) Session.refresh(third_obj) - third_package_dict = get_action('package_show')(self.context,{'id':third_obj.package_id}) + third_package_dict = get_action('package_show')(self.context, {'id': third_obj.package_id}) # Package was updated assert third_package_dict, first_package_dict['id'] == third_package_dict['id'] @@ -543,7 +543,7 @@ def test_harvest_deleted_record(self): first_obj = self._run_job_for_single_document(first_job) - first_package_dict = get_action('package_show')(self.context,{'id':first_obj.package_id}) + first_package_dict = get_action('package_show')(self.context, {'id': first_obj.package_id}) # Package was created assert first_package_dict @@ -552,8 +552,8 @@ def test_harvest_deleted_record(self): # Delete package first_package_dict['state'] = u'deleted' - self.context.update({'id':first_package_dict['id']}) - updated_package_dict = get_action('package_update')(self.context,first_package_dict) + self.context.update({'id': first_package_dict['id']}) + updated_package_dict = get_action('package_update')(self.context, first_package_dict) # Create and run a second job, the date has not changed, so the package should not be updated # and remain deleted @@ -563,14 +563,13 @@ def test_harvest_deleted_record(self): second_obj = self._run_job_for_single_document(second_job) - second_package_dict = get_action('package_show')(self.context,{'id':first_obj.package_id}) + second_package_dict = get_action('package_show')(self.context, {'id': first_obj.package_id}) # Package was not updated assert second_package_dict, updated_package_dict['id'] == second_package_dict['id'] assert not second_obj.package, not second_obj.package_id assert second_obj.current == False, first_obj.current == True - # Harvest an updated document, with a more recent modified date, package should be # updated and reactivated source.url = u'http://127.0.0.1:8999/gemini2.1/service1_newer.xml' @@ -580,7 +579,7 @@ def test_harvest_deleted_record(self): third_obj = self._run_job_for_single_document(third_job) - third_package_dict = get_action('package_show')(self.context,{'id':first_obj.package_id}) + third_package_dict = get_action('package_show')(self.context, {'id': first_obj.package_id}) Session.remove() Session.add(first_obj) @@ -600,8 +599,6 @@ def test_harvest_deleted_record(self): assert 'NEWER' in third_package_dict['title'] assert third_package_dict['state'] == u'active' - - def test_harvest_different_sources_same_document(self): # Create source1 @@ -616,7 +613,7 @@ def test_harvest_different_sources_same_document(self): first_obj = self._run_job_for_single_document(first_job) - first_package_dict = get_action('package_show')(self.context,{'id':first_obj.package_id}) + first_package_dict = get_action('package_show')(self.context, {'id': first_obj.package_id}) # Package was created assert first_package_dict @@ -638,7 +635,7 @@ def test_harvest_different_sources_same_document(self): second_obj = self._run_job_for_single_document(second_job) - second_package_dict = get_action('package_show')(self.context,{'id':first_obj.package_id}) + second_package_dict = get_action('package_show')(self.context, {'id': first_obj.package_id}) # Package was not updated assert second_package_dict, first_package_dict['id'] == second_package_dict['id'] @@ -647,7 +644,7 @@ def test_harvest_different_sources_same_document(self): # Inactivate source1 and reharvest from source2, package should be updated third_job = self._create_job(source2.id) - third_obj = self._run_job_for_single_document(third_job,force_import=True) + third_obj = self._run_job_for_single_document(third_job, force_import=True) Session.remove() Session.add(first_obj) @@ -658,7 +655,7 @@ def test_harvest_different_sources_same_document(self): Session.refresh(second_obj) Session.refresh(third_obj) - third_package_dict = get_action('package_show')(self.context,{'id':first_obj.package_id}) + third_package_dict = get_action('package_show')(self.context, {'id': first_obj.package_id}) # Package was updated assert third_package_dict, first_package_dict['id'] == third_package_dict['id'] @@ -667,7 +664,6 @@ def test_harvest_different_sources_same_document(self): assert second_obj.current == False assert first_obj.current == False - def test_harvest_different_sources_same_document_but_deleted_inbetween(self): # Create source1 @@ -682,7 +678,7 @@ def test_harvest_different_sources_same_document_but_deleted_inbetween(self): first_obj = self._run_job_for_single_document(first_job) - first_package_dict = get_action('package_show')(self.context,{'id':first_obj.package_id}) + first_package_dict = get_action('package_show')(self.context, {'id': first_obj.package_id}) # Package was created assert first_package_dict @@ -690,8 +686,8 @@ def test_harvest_different_sources_same_document_but_deleted_inbetween(self): assert first_obj.current == True # Delete/withdraw the package - first_package_dict = get_action('package_delete')(self.context,{'id':first_obj.package_id}) - first_package_dict = get_action('package_show')(self.context,{'id':first_obj.package_id}) + first_package_dict = get_action('package_delete')(self.context, {'id': first_obj.package_id}) + first_package_dict = get_action('package_show')(self.context, {'id': first_obj.package_id}) # Harvest the same document, unchanged, from another source source2_fixture = { @@ -705,7 +701,7 @@ def test_harvest_different_sources_same_document_but_deleted_inbetween(self): second_obj = self._run_job_for_single_document(second_job) - second_package_dict = get_action('package_show')(self.context,{'id':first_obj.package_id}) + second_package_dict = get_action('package_show')(self.context, {'id': first_obj.package_id}) # It would be good if the package was updated, but we see that it isn't assert second_package_dict, first_package_dict['id'] == second_package_dict['id'] @@ -713,7 +709,6 @@ def test_harvest_different_sources_same_document_but_deleted_inbetween(self): assert second_obj.current == False assert first_obj.current == True - def test_harvest_moves_sources(self): # Create source1 @@ -728,7 +723,7 @@ def test_harvest_moves_sources(self): first_obj = self._run_job_for_single_document(first_job) - first_package_dict = get_action('package_show')(self.context,{'id':first_obj.package_id}) + first_package_dict = get_action('package_show')(self.context, {'id': first_obj.package_id}) # Package was created assert first_package_dict @@ -747,7 +742,7 @@ def test_harvest_moves_sources(self): second_obj = self._run_job_for_single_document(second_job) - second_package_dict = get_action('package_show')(self.context,{'id':first_obj.package_id}) + second_package_dict = get_action('package_show')(self.context, {'id': first_obj.package_id}) # Now we have two packages assert second_package_dict, first_package_dict['id'] == second_package_dict['id'] @@ -758,7 +753,6 @@ def test_harvest_moves_sources(self): # to update the date to get it to reharvest, and then you should # withdraw the package relating to the original harvest source. - def test_harvest_import_command(self): # Create source @@ -773,7 +767,7 @@ def test_harvest_import_command(self): first_obj = self._run_job_for_single_document(first_job) - before_package_dict = get_action('package_show')(self.context,{'id':first_obj.package_id}) + before_package_dict = get_action('package_show')(self.context, {'id': first_obj.package_id}) # Package was created assert before_package_dict @@ -787,7 +781,7 @@ def test_harvest_import_command(self): third_obj = self._run_job_for_single_document(third_job) # Run the import command manually - imported_objects = get_action('harvest_objects_import')(self.context,{'source_id':source.id}) + get_action('harvest_objects_import')(self.context, {'source_id': source.id}) Session.remove() Session.add(first_obj) Session.add(second_obj) @@ -797,7 +791,7 @@ def test_harvest_import_command(self): Session.refresh(second_obj) Session.refresh(third_obj) - after_package_dict = get_action('package_show')(self.context,{'id':first_obj.package_id}) + after_package_dict = get_action('package_show')(self.context, {'id': first_obj.package_id}) # Package was updated, and the current object remains the same assert after_package_dict, before_package_dict['id'] == after_package_dict['id'] @@ -805,12 +799,11 @@ def test_harvest_import_command(self): assert second_obj.current == False assert first_obj.current == True - - source_dict = get_action('harvest_source_show')(self.context,{'id':source.id}) + source_dict = get_action('harvest_source_show')(self.context, {'id': source.id}) assert source_dict['status']['total_datasets'] == 1 def test_clean_tags(self): - + # Create source source_fixture = { 'title': 'Test Source', @@ -834,36 +827,36 @@ def test_clean_tags(self): user_name = user.name org = Group.by_name('test-org') if org is None: - org = call_action('organization_create', - context={'user': user_name}, - name='test-org') + org = call_action('organization_create', + context={'user': user_name}, + name='test-org') existing_g = Group.by_name('existing-group') if existing_g is None: - existing_g = call_action('group_create', - context={'user': user_name}, - name='existing-group') + existing_g = call_action('group_create', + context={'user': user_name}, + name='existing-group') - context = {'user': 'dummy'} + context = {'user': 'dummy'} package_schema = default_update_package_schema() context['schema'] = package_schema package_dict = {'frequency': 'manual', - 'publisher_name': 'dummy', - 'extras': [{'key':'theme', 'value':['non-mappable', 'thememap1']}], - 'groups': [], - 'title': 'fakename', - 'holder_name': 'dummy', - 'holder_identifier': 'dummy', - 'name': 'fakename', - 'notes': 'dummy', - 'owner_org': 'test-org', - 'modified': datetime.now(), - 'publisher_identifier': 'dummy', - 'metadata_created' : datetime.now(), - 'metadata_modified' : datetime.now(), - 'guid': unicode(uuid4()), - 'identifier': 'dummy'} - - package_data = call_action('package_create', context=context, **package_dict) + 'publisher_name': 'dummy', + 'extras': [{'key': 'theme', 'value': ['non-mappable', 'thememap1']}], + 'groups': [], + 'title': 'fakename', + 'holder_name': 'dummy', + 'holder_identifier': 'dummy', + 'name': 'fakename', + 'notes': 'dummy', + 'owner_org': 'test-org', + 'modified': datetime.now(), + 'publisher_identifier': 'dummy', + 'metadata_created': datetime.now(), + 'metadata_modified': datetime.now(), + 'guid': unicode(uuid4()), + 'identifier': 'dummy'} + + call_action('package_create', context=context, **package_dict) package = Package.get('fakename') source, job = self._create_source_and_job(source_fixture) @@ -918,6 +911,7 @@ def test_clean_tags(self): GUID = 'e269743a-cfda-4632-a939-0c8416ae801e' GEMINI_MISSING_GUID = '''''' + class TestGatherMethods(HarvestFixtureBase): def setup(self): HarvestFixtureBase.setup(self) @@ -944,13 +938,16 @@ def test_get_gemini_string_and_guid__no_guid(self): assert_equal(res, (GEMINI_MISSING_GUID, '')) def test_get_gemini_string_and_guid__non_parsing(self): - content = '' # no closing tag + # no closing tag + content = '' + assert_raises(lxml.etree.XMLSyntaxError, self.harvester.get_gemini_string_and_guid, content) def test_get_gemini_string_and_guid__empty(self): content = '' assert_raises(lxml.etree.XMLSyntaxError, self.harvester.get_gemini_string_and_guid, content) + class TestImportStageTools: def test_licence_url_normal(self): assert_equal(GeminiHarvester._extract_first_licence_url( @@ -1023,7 +1020,7 @@ def test_responsible_organisation_multiple_roles(self): assert_equal(GeminiHarvester._process_responsible_organisation(responsible_organisation), ('Ordnance Survey', ['Distributor (distributor)', 'Ordnance Survey (publisher, custodian)', - ])) + ])) def test_responsible_organisation_blank_provider(self): # no owner or publisher, so blank provider @@ -1067,8 +1064,7 @@ def get_validation_errors(self, validation_test_filename): harvester = GeminiDocHarvester() # Gather stage for GeminiDocHarvester includes validation - object_ids = harvester.gather_stage(job) - + harvester.gather_stage(job) # Check the validation errors errors = '; '.join([gather_error.message for gather_error in job.gather_errors]) @@ -1125,7 +1121,8 @@ def test_10_service_fail_constraints_schematron(self): def test_11_service_fail_gemini_schematron(self): errors = self.get_validation_errors('11_Service_Invalid_GEMINI_Service_Type.xml') assert len(errors) > 0 - assert_in("Service type shall be one of 'discovery', 'view', 'download', 'transformation', 'invoke' or 'other' following INSPIRE generic names.", errors) + assert_in("Service type shall be one of" + " 'discovery', 'view', 'download', 'transformation', 'invoke' or 'other' following INSPIRE generic names.", errors) def test_12_service_valid(self): errors = self.get_validation_errors('12_Service_Valid.xml') diff --git a/ckanext/spatial/tests/test_validation.py b/ckanext/spatial/tests/test_validation.py index 860c2387..faadbd36 100644 --- a/ckanext/spatial/tests/test_validation.py +++ b/ckanext/spatial/tests/test_validation.py @@ -7,10 +7,11 @@ # other validation tests are in test_harvest.py + class TestValidation: def _get_file_path(self, file_name): - return os.path.join(os.path.dirname(__file__), 'xml', file_name) + return os.path.join(os.path.dirname(__file__), 'xml', file_name) def get_validation_errors(self, validator, validation_test_filename): validation_test_filepath = self._get_file_path(validation_test_filename) @@ -45,13 +46,13 @@ def test_01_dataset_fail_iso19139_schema(self): def test_02_dataset_fail_constraints_schematron(self): errors = self.get_validation_errors(validation.ConstraintsSchematron14, - 'gemini2.1/validation/02_Dataset_Invalid_19139_Missing_Data_Format.xml') + 'gemini2.1/validation/02_Dataset_Invalid_19139_Missing_Data_Format.xml') assert len(errors) > 0 assert_in('MD_Distribution / MD_Format: count(distributionFormat + distributorFormat) > 0', errors) def test_03_dataset_fail_gemini_schematron(self): errors = self.get_validation_errors(validation.Gemini2Schematron, - 'gemini2.1/validation/03_Dataset_Invalid_GEMINI_Missing_Keyword.xml') + 'gemini2.1/validation/03_Dataset_Invalid_GEMINI_Missing_Keyword.xml') assert len(errors) > 0 assert_in('Descriptive keywords are mandatory', errors) @@ -71,20 +72,20 @@ def test_04_dataset_valid(self): def test_05_series_fail_iso19139_schema(self): errors = self.get_validation_errors(validation.ISO19139EdenSchema, - 'gemini2.1/validation/05_Series_Invalid_XSD_No_Such_Element.xml') + 'gemini2.1/validation/05_Series_Invalid_XSD_No_Such_Element.xml') assert len(errors) > 0 assert_in('(gmx.xsd)', errors) assert_in('\'{http://www.isotc211.org/2005/gmd}nosuchelement\': This element is not expected.', errors) def test_06_series_fail_constraints_schematron(self): errors = self.get_validation_errors(validation.ConstraintsSchematron14, - 'gemini2.1/validation/06_Series_Invalid_19139_Missing_Data_Format.xml') + 'gemini2.1/validation/06_Series_Invalid_19139_Missing_Data_Format.xml') assert len(errors) > 0 assert_in('MD_Distribution / MD_Format: count(distributionFormat + distributorFormat) > 0', errors) def test_07_series_fail_gemini_schematron(self): errors = self.get_validation_errors(validation.Gemini2Schematron, - 'gemini2.1/validation/07_Series_Invalid_GEMINI_Missing_Keyword.xml') + 'gemini2.1/validation/07_Series_Invalid_GEMINI_Missing_Keyword.xml') assert len(errors) > 0 assert_in('Descriptive keywords are mandatory', errors) @@ -93,22 +94,23 @@ def test_08_series_valid(self): def test_09_service_fail_iso19139_schema(self): errors = self.get_validation_errors(validation.ISO19139EdenSchema, - 'gemini2.1/validation/09_Service_Invalid_No_Such_Element.xml') + 'gemini2.1/validation/09_Service_Invalid_No_Such_Element.xml') assert len(errors) > 0 assert_in('(gmx.xsd & srv.xsd)', errors) assert_in('\'{http://www.isotc211.org/2005/gmd}nosuchelement\': This element is not expected.', errors) def test_10_service_fail_constraints_schematron(self): errors = self.get_validation_errors(validation.ConstraintsSchematron14, - 'gemini2.1/validation/10_Service_Invalid_19139_Level_Description.xml') + 'gemini2.1/validation/10_Service_Invalid_19139_Level_Description.xml') assert len(errors) > 0 assert_in("DQ_Scope: 'levelDescription' is mandatory if 'level' notEqual 'dataset' or 'series'.", errors) def test_11_service_fail_gemini_schematron(self): errors = self.get_validation_errors(validation.Gemini2Schematron, - 'gemini2.1/validation/11_Service_Invalid_GEMINI_Service_Type.xml') + 'gemini2.1/validation/11_Service_Invalid_GEMINI_Service_Type.xml') assert len(errors) > 0 - assert_in("Service type shall be one of 'discovery', 'view', 'download', 'transformation', 'invoke' or 'other' following INSPIRE generic names.", errors) + assert_in("Service type shall be one of 'discovery', 'view', 'download', 'transformation', " + "'invoke' or 'other' following INSPIRE generic names.", errors) def test_12_service_valid(self): self.assert_passes_all_gemini2_1_validation('gemini2.1/validation/12_Service_Valid.xml') @@ -116,7 +118,7 @@ def test_12_service_valid(self): def test_13_dataset_fail_iso19139_schema_2(self): # This test Dataset has srv tags and only Service metadata should. errors = self.get_validation_errors(validation.ISO19139EdenSchema, - 'gemini2.1/validation/13_Dataset_Invalid_Element_srv.xml') + 'gemini2.1/validation/13_Dataset_Invalid_Element_srv.xml') assert len(errors) > 0 assert_in('(gmx.xsd)', errors) assert_in('Element \'{http://www.isotc211.org/2005/srv}SV_ServiceIdentification\': This element is not expected.', errors) @@ -140,7 +142,6 @@ def test_schematron_error_extraction(self): assert_in("/*[local-name()='MD_Metadata'", details) assert_in("Service type shall be one of 'discovery'", details) - def test_error_line_numbers(self): file_path = self._get_file_path('iso19139/dataset-invalid.xml') xml = etree.parse(file_path) diff --git a/ckanext/spatial/tests/xml_file_server.py b/ckanext/spatial/tests/xml_file_server.py index 31e62f07..9b7e0afb 100644 --- a/ckanext/spatial/tests/xml_file_server.py +++ b/ckanext/spatial/tests/xml_file_server.py @@ -7,20 +7,21 @@ PORT = 8999 + def serve(port=PORT): '''Serves test XML files over HTTP''' - + # Make sure we serve from the tests' XML directory os.chdir(os.path.join(os.path.dirname(os.path.abspath(__file__)), 'xml')) Handler = SimpleHTTPServer.SimpleHTTPRequestHandler - + class TestServer(SocketServer.TCPServer): allow_reuse_address = True - + httpd = TestServer(("", PORT), Handler) - + print 'Serving test HTTP server at port', PORT httpd_thread = Thread(target=httpd.serve_forever) diff --git a/ckanext/spatial/validation/__init__.py b/ckanext/spatial/validation/__init__.py index 8643dccc..2e2033b3 100644 --- a/ckanext/spatial/validation/__init__.py +++ b/ckanext/spatial/validation/__init__.py @@ -5,5 +5,3 @@ except ImportError: import pkgutil __path__ = pkgutil.extend_path(__path__, __name__) - -from validation import * diff --git a/ckanext/spatial/validation/validation.py b/ckanext/spatial/validation/validation.py index 28e8506a..634ac277 100644 --- a/ckanext/spatial/validation/validation.py +++ b/ckanext/spatial/validation/validation.py @@ -316,6 +316,7 @@ def get_schematrons(cls): "xml/gemini2/Gemini2_R1r3.sch") as schema: return [cls.schematron(schema)] + all_validators = (ISO19139Schema, ISO19139EdenSchema, ISO19139NGDCSchema, @@ -338,7 +339,7 @@ def __init__(self, profiles=["iso19139", "constraints", "gemini2"]): self.validators[validator_class.name] = validator_class def add_validator(self, validator_class): - self.validators[validator_class.name] = validator_class + self.validators[validator_class.name] = validator_class def isvalid(self, xml): '''For backward compatibility''' @@ -358,13 +359,12 @@ def is_valid(self, xml): (is_valid, failed_profile_name, [(error_message_string, error_line_number)]) ''' - log.debug('Starting validation against profile(s) %s' % ','.join(self.profiles)) for name in self.profiles: validator = self.validators[name] is_valid, error_message_list = validator.is_valid(xml) if not is_valid: - #error_message_list.insert(0, 'Validating against "%s" profile failed' % validator.title) + # error_message_list.insert(0, 'Validating against "%s" profile failed' % validator.title) log.info('Validating against "%s" profile failed' % validator.title) log.debug('%r', error_message_list) return False, validator.name, error_message_list @@ -372,6 +372,7 @@ def is_valid(self, xml): log.info('Validation passed') return True, None, [] + if __name__ == '__main__': from sys import argv import logging diff --git a/doc/conf.py b/doc/conf.py index ab4097ca..d3c538bb 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -11,17 +11,17 @@ # All configuration values have a default; values that are commented out # serve to show the default. -import sys, os +import os # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. -#sys.path.insert(0, os.path.abspath('.')) +# sys.path.insert(0, os.path.abspath('.')) # -- General configuration ----------------------------------------------------- # If your documentation needs a minimal Sphinx version, state it here. -#needs_sphinx = '1.0' +# needs_sphinx = '1.0' # Add any Sphinx extension module names here, as strings. They can be extensions # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. @@ -34,7 +34,7 @@ source_suffix = '.rst' # The encoding of source files. -#source_encoding = 'utf-8-sig' +# source_encoding = 'utf-8-sig' # The master toctree document. master_doc = 'index' @@ -54,40 +54,40 @@ # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. -#language = None +# language = None # There are two options for replacing |today|: either, you set today to some # non-false value, then it is used: -#today = '' +# today = '' # Else, today_fmt is used as the format for a strftime call. -#today_fmt = '%B %d, %Y' +# today_fmt = '%B %d, %Y' # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. exclude_patterns = [] # The reST default role (used for this markup: `text`) to use for all documents. -#default_role = None +# default_role = None # If true, '()' will be appended to :func: etc. cross-reference text. -#add_function_parentheses = True +# add_function_parentheses = True # If true, the current module name will be prepended to all description # unit titles (such as .. function::). -#add_module_names = True +# add_module_names = True # If true, sectionauthor and moduleauthor directives will be shown in the # output. They are ignored by default. -#show_authors = False +# show_authors = False # The name of the Pygments (syntax highlighting) style to use. pygments_style = 'sphinx' # A list of ignored prefixes for module index sorting. -#modindex_common_prefix = [] +# modindex_common_prefix = [] # If true, keep warnings as "system message" paragraphs in the built documents. -#keep_warnings = False +# keep_warnings = False exclude_trees = ['.build'] @@ -102,15 +102,15 @@ html_theme = 'sphinx_rtd_theme' html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] -#html_theme = 'default' +# html_theme = 'default' # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. -#html_theme_options = {} +# html_theme_options = {} # Add any paths that contain custom themes here, relative to this directory. -#html_theme_path = [] +# html_theme_path = [] html_sidebars = { '**': ['globaltoc.html'] } @@ -118,19 +118,19 @@ # The name for this set of Sphinx documents. If None, it defaults to # " v documentation". -#html_title = None +# html_title = None # A shorter title for the navigation bar. Default is the same as html_title. -#html_short_title = None +# html_short_title = None # The name of an image file (relative to this directory) to place at the top # of the sidebar. -#html_logo = None +# html_logo = None # The name of an image file (within the static path) to use as favicon of the # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 # pixels large. -#html_favicon = None +# html_favicon = None # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, @@ -139,44 +139,44 @@ # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, # using the given strftime format. -#html_last_updated_fmt = '%b %d, %Y' +# html_last_updated_fmt = '%b %d, %Y' # If true, SmartyPants will be used to convert quotes and dashes to # typographically correct entities. -#html_use_smartypants = True +# html_use_smartypants = True # Custom sidebar templates, maps document names to template names. -#html_sidebars = {} +# html_sidebars = {} # Additional templates that should be rendered to pages, maps page names to # template names. -#html_additional_pages = {} +# html_additional_pages = {} # If false, no module index is generated. -#html_domain_indices = True +# html_domain_indices = True # If false, no index is generated. -#html_use_index = True +# html_use_index = True # If true, the index is split into individual pages for each letter. -#html_split_index = False +# html_split_index = False # If true, links to the reST sources are added to the pages. -#html_show_sourcelink = True +# html_show_sourcelink = True # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. -#html_show_sphinx = True +# html_show_sphinx = True # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. -#html_show_copyright = True +# html_show_copyright = True # If true, an OpenSearch description file will be output, and all pages will # contain a tag referring to it. The value of this option must be the # base URL from which the finished HTML is served. -#html_use_opensearch = '' +# html_use_opensearch = '' # This is the file name suffix for HTML files (e.g. ".xhtml"). -#html_file_suffix = None +# html_file_suffix = None # Output file base name for HTML help builder. htmlhelp_basename = 'ckanext-spatialdoc' @@ -185,14 +185,14 @@ # -- Options for LaTeX output -------------------------------------------------- latex_elements = { -# The paper size ('letterpaper' or 'a4paper'). -#'papersize': 'letterpaper', + # The paper size ('letterpaper' or 'a4paper'). + # 'papersize': 'letterpaper', -# The font size ('10pt', '11pt' or '12pt'). -#'pointsize': '10pt', + # The font size ('10pt', '11pt' or '12pt'). + # 'pointsize': '10pt', -# Additional stuff for the LaTeX preamble. -#'preamble': '', + # Additional stuff for the LaTeX preamble. + # 'preamble': '', } # Grouping the document tree into LaTeX files. List of tuples @@ -204,23 +204,23 @@ # The name of an image file (relative to this directory) to place at the top of # the title page. -#latex_logo = None +# latex_logo = None # For "manual" documents, if this is true, then toplevel headings are parts, # not chapters. -#latex_use_parts = False +# latex_use_parts = False # If true, show page references after internal links. -#latex_show_pagerefs = False +# latex_show_pagerefs = False # If true, show URL addresses after external links. -#latex_show_urls = False +# latex_show_urls = False # Documents to append as an appendix to all manuals. -#latex_appendices = [] +# latex_appendices = [] # If false, no module index is generated. -#latex_domain_indices = True +# latex_domain_indices = True # -- Options for manual page output -------------------------------------------- @@ -233,14 +233,14 @@ ] # If true, show URL addresses after external links. -#man_show_urls = False +# man_show_urls = False # -- Options for Texinfo output ------------------------------------------------ # Grouping the document tree into Texinfo files. List of tuples # (source start file, target name, title, author, -# dir menu entry, description, category) +# dir menu entry, description, category) texinfo_documents = [ ('index', 'ckanext-spatial', u'ckanext-spatial Documentation', u'Open Knowledge Foundation', 'ckanext-spatial', 'One line description of project.', @@ -248,16 +248,16 @@ ] # Documents to append as an appendix to all manuals. -#texinfo_appendices = [] +# texinfo_appendices = [] # If false, no module index is generated. -#texinfo_domain_indices = True +# texinfo_domain_indices = True # How to display URL addresses: 'footnote', 'no', or 'inline'. -#texinfo_show_urls = 'footnote' +# texinfo_show_urls = 'footnote' # If true, do not generate a @detailmenu in the "Top" node's menu. -#texinfo_no_detailmenu = False +# texinfo_no_detailmenu = False # Example configuration for intersphinx: refer to the Python standard library. From c1b6111c7ae6b8e28d8d0311d7cd9675c9545536 Mon Sep 17 00:00:00 2001 From: Teemu Leivo Date: Tue, 26 Feb 2019 14:31:52 +0200 Subject: [PATCH 02/12] First Flake8 style changes --- ckanext/spatial/harvesters/__init__.py | 4 - ckanext/spatial/harvesters/waf.py | 111 +++++++++----------- ckanext/spatial/lib/__init__.py | 34 +++--- ckanext/spatial/lib/csw_client.py | 38 ++++--- ckanext/spatial/lib/report.py | 10 +- ckanext/spatial/lib/reports.py | 9 +- ckanext/spatial/model/__init__.py | 3 - ckanext/spatial/model/harvested_metadata.py | 10 +- ckanext/spatial/model/package_extent.py | 16 +-- ckanext/spatial/tests/base.py | 17 ++- ckanext/spatial/tests/test_api.py | 1 - ckanext/spatial/tests/test_csw_client.py | 17 ++- ckanext/spatial/tests/test_harvest.py | 89 ++++++++-------- 13 files changed, 179 insertions(+), 180 deletions(-) diff --git a/ckanext/spatial/harvesters/__init__.py b/ckanext/spatial/harvesters/__init__.py index 0093d426..2e2033b3 100644 --- a/ckanext/spatial/harvesters/__init__.py +++ b/ckanext/spatial/harvesters/__init__.py @@ -5,7 +5,3 @@ except ImportError: import pkgutil __path__ = pkgutil.extend_path(__path__, __name__) - -from ckanext.spatial.harvesters.csw import CSWHarvester -from ckanext.spatial.harvesters.waf import WAFHarvester -from ckanext.spatial.harvesters.doc import DocHarvester diff --git a/ckanext/spatial/harvesters/waf.py b/ckanext/spatial/harvesters/waf.py index 488e9603..e9d9d7e3 100644 --- a/ckanext/spatial/harvesters/waf.py +++ b/ckanext/spatial/harvesters/waf.py @@ -5,7 +5,6 @@ import pyparsing as parse import requests from sqlalchemy.orm import aliased -from sqlalchemy.exc import DataError from ckan import model @@ -14,7 +13,6 @@ from ckanext.harvest.interfaces import IHarvester from ckanext.harvest.model import HarvestObject from ckanext.harvest.model import HarvestObjectExtra as HOExtra -import ckanext.harvest.queue as queue from ckanext.spatial.harvesters.base import SpatialHarvester, guess_standard @@ -36,17 +34,15 @@ def info(self): 'description': 'A Web Accessible Folder (WAF) displaying a list of spatial metadata documents' } - def get_original_url(self, harvest_object_id): url = model.Session.query(HOExtra.value).\ - filter(HOExtra.key=='waf_location').\ - filter(HOExtra.harvest_object_id==harvest_object_id).\ + filter(HOExtra.key == 'waf_location').\ + filter(HOExtra.harvest_object_id == harvest_object_id).\ first() return url[0] if url else None - - def gather_stage(self,harvest_job,collection_package_id=None): + def gather_stage(self, harvest_job, collection_package_id=None): log = logging.getLogger(__name__ + '.WAF.gather') log.debug('WafHarvester gather_stage for job: %r', harvest_job) @@ -62,46 +58,44 @@ def gather_stage(self,harvest_job,collection_package_id=None): response = requests.get(source_url, timeout=60) response.raise_for_status() except requests.exceptions.RequestException, e: - self._save_gather_error('Unable to get content for URL: %s: %r' % \ - (source_url, e),harvest_job) + self._save_gather_error('Unable to get content for URL: %s: %r' % + (source_url, e), harvest_job) return None content = response.content scraper = _get_scraper(response.headers.get('server')) - ###### Get current harvest object out of db ###### - - url_to_modified_db = {} ## mapping of url to last_modified in db - url_to_ids = {} ## mapping of url to guid in db + # Get current harvest object out of db + url_to_modified_db = {} # mapping of url to last_modified in db + url_to_ids = {} # mapping of url to guid in db HOExtraAlias1 = aliased(HOExtra) HOExtraAlias2 = aliased(HOExtra) query = model.Session.query(HarvestObject.guid, HarvestObject.package_id, HOExtraAlias1.value, HOExtraAlias2.value).\ - join(HOExtraAlias1, HarvestObject.extras).\ - join(HOExtraAlias2, HarvestObject.extras).\ - filter(HOExtraAlias1.key=='waf_modified_date').\ - filter(HOExtraAlias2.key=='waf_location').\ - filter(HarvestObject.current==True).\ - filter(HarvestObject.harvest_source_id==harvest_job.source.id) - + join(HOExtraAlias1, HarvestObject.extras).\ + join(HOExtraAlias2, HarvestObject.extras).\ + filter(HOExtraAlias1.key == 'waf_modified_date').\ + filter(HOExtraAlias2.key == 'waf_location').\ + filter(HarvestObject.current == True).\ + filter(HarvestObject.harvest_source_id == harvest_job.source.id) for guid, package_id, modified_date, url in query: url_to_modified_db[url] = modified_date url_to_ids[url] = (guid, package_id) - ###### Get current list of records from source ###### + # Get current list of records from source - url_to_modified_harvest = {} ## mapping of url to last_modified in harvest + url_to_modified_harvest = {} # mapping of url to last_modified in harvest try: - for url, modified_date in _extract_waf(content,source_url,scraper): + for url, modified_date in _extract_waf(content, source_url, scraper): url_to_modified_harvest[url] = modified_date - except Exception,e: + except Exception, e: msg = 'Error extracting URLs from %s, error was %s' % (source_url, e) - self._save_gather_error(msg,harvest_job) + self._save_gather_error(msg, harvest_job) return None - ###### Compare source and db ###### + # Compare source and db harvest_locations = set(url_to_modified_harvest.keys()) old_locations = set(url_to_modified_db.keys()) @@ -112,9 +106,9 @@ def gather_stage(self,harvest_job,collection_package_id=None): change = [] for item in possible_changes: - if (not url_to_modified_harvest[item] or not url_to_modified_db[item] #if there is no date assume change + if (not url_to_modified_harvest[item] or not url_to_modified_db[item] # if there is no date assume change or url_to_modified_harvest[item] > url_to_modified_db[item]): - change.append(item) + change.append(item) def create_extras(url, date, status): extras = [HOExtra(key='waf_modified_date', value=date), @@ -127,16 +121,15 @@ def create_extras(url, date, status): ) return extras - ids = [] for location in new: - guid=hashlib.md5(location.encode('utf8','ignore')).hexdigest() + guid = hashlib.md5(location.encode('utf8', 'ignore')).hexdigest() obj = HarvestObject(job=harvest_job, extras=create_extras(location, url_to_modified_harvest[location], 'new'), guid=guid - ) + ) obj.save() ids.append(obj.id) @@ -147,19 +140,19 @@ def create_extras(url, date, status): 'change'), guid=url_to_ids[location][0], package_id=url_to_ids[location][1], - ) + ) obj.save() ids.append(obj.id) for location in delete: obj = HarvestObject(job=harvest_job, - extras=create_extras('','', 'delete'), + extras=create_extras('', '', 'delete'), guid=url_to_ids[location][0], package_id=url_to_ids[location][1], - ) + ) model.Session.query(HarvestObject).\ - filter_by(guid=url_to_ids[location][0]).\ - update({'current': False}, False) + filter_by(guid=url_to_ids[location][0]).\ + update({'current': False}, False) obj.save() ids.append(obj.id) @@ -169,14 +162,13 @@ def create_extras(url, date, status): len(ids), len(new), len(change), len(delete))) return ids else: - self._save_gather_error('No records to change', - harvest_job) + self._save_gather_error('No records to change', harvest_job) return [] def fetch_stage(self, harvest_object): # Check harvest object status - status = self._get_object_extra(harvest_object,'status') + status = self._get_object_extra(harvest_object, 'status') if status == 'delete': # No need to fetch anything, just pass to the import stage @@ -221,27 +213,27 @@ def fetch_stage(self, harvest_object): return True -apache = parse.SkipTo(parse.CaselessLiteral("", include=True).suppress() \ - + parse.Optional(parse.Literal('')).suppress() \ - + parse.Optional(parse.Combine( +apache = parse.SkipTo(parse.CaselessLiteral("", include=True).suppress() \ + + parse.Optional(parse.Literal('')).suppress() \ + + parse.Optional(parse.Combine( parse.Word(parse.alphanums+'-') + - parse.Word(parse.alphanums+':') - ,adjacent=False, joinString=' ').setResultsName('date') + parse.Word(parse.alphanums+':'), + adjacent=False, joinString=' ').setResultsName('date') ) -iis = parse.SkipTo("
").suppress() \ - + parse.OneOrMore("
").suppress() \ - + parse.Optional(parse.Combine( - parse.Word(parse.alphanums+'/') + - parse.Word(parse.alphanums+':') + - parse.Word(parse.alphas) - , adjacent=False, joinString=' ').setResultsName('date') - ) \ - + parse.Word(parse.nums).suppress() \ - + parse.Literal('
").suppress() \ + + parse.OneOrMore("
").suppress() \ + + parse.Optional(parse.Combine( + parse.Word(parse.alphanums+'/') + + parse.Word(parse.alphanums+':') + + parse.Word(parse.alphas), + adjacent=False, joinString=' ').setResultsName('date') + ) \ + + parse.Word(parse.nums).suppress() \ + + parse.Literal('
Date: Tue, 26 Feb 2019 18:21:46 +0200 Subject: [PATCH 03/12] More Flake8 style changes --- ckanext/spatial/commands/__init__.py | 2 +- ckanext/spatial/commands/csw.py | 14 +- ckanext/spatial/commands/spatial.py | 35 +++-- ckanext/spatial/commands/validation.py | 11 +- ckanext/spatial/controllers/__init__.py | 1 - ckanext/spatial/harvesters/base.py | 102 +++++++------- ckanext/spatial/harvesters/csw.py | 18 ++- ckanext/spatial/harvesters/doc.py | 42 +++--- ckanext/spatial/harvesters/gemini.py | 168 +++++++++++------------- ckanext/spatial/plugin.py | 65 ++++----- 10 files changed, 217 insertions(+), 241 deletions(-) diff --git a/ckanext/spatial/commands/__init__.py b/ckanext/spatial/commands/__init__.py index d2547b8d..267f7100 100644 --- a/ckanext/spatial/commands/__init__.py +++ b/ckanext/spatial/commands/__init__.py @@ -3,4 +3,4 @@ pkg_resources.declare_namespace(__name__) except ImportError: import pkgutil - __path__ = pkgutil.extend_path(__path__, __name__) \ No newline at end of file + __path__ = pkgutil.extend_path(__path__, __name__) diff --git a/ckanext/spatial/commands/csw.py b/ckanext/spatial/commands/csw.py index 88517a6b..f1899922 100644 --- a/ckanext/spatial/commands/csw.py +++ b/ckanext/spatial/commands/csw.py @@ -5,6 +5,8 @@ from paste import script log = logging.getLogger(__name__) + + class Pycsw(script.command.Command): '''Manages the CKAN-pycsw integration @@ -35,10 +37,14 @@ class Pycsw(script.command.Command): ''' parser = script.command.Command.standard_parser(verbose=True) - parser.add_option('-p', '--pycsw-config', dest='pycsw_config', - default='default.cfg', help='pycsw config file to use.') - parser.add_option('-u', '--ckan-url', dest='ckan_url', - default='http://localhost', help='CKAN instance to import the datasets from.') + parser.add_option('-p', '--pycsw-config', + dest='pycsw_config', + default='default.cfg', + help='pycsw config file to use.') + parser.add_option('-u', '--ckan-url', + dest='ckan_url', + default='http://localhost', + help='CKAN instance to import the datasets from.') summary = __doc__.split('\n')[0] usage = __doc__ diff --git a/ckanext/spatial/commands/spatial.py b/ckanext/spatial/commands/spatial.py index 8f75af6a..bf8dbe03 100644 --- a/ckanext/spatial/commands/spatial.py +++ b/ckanext/spatial/commands/spatial.py @@ -1,6 +1,4 @@ import sys -import re -from pprint import pprint import logging from ckan.lib.cli import CkanCommand @@ -8,6 +6,7 @@ from ckanext.spatial.lib import save_package_extent log = logging.getLogger(__name__) + class Spatial(CkanCommand): '''Performs spatially related operations. @@ -20,7 +19,7 @@ class Spatial(CkanCommand): spatial extents Creates or updates the extent geometry column for datasets with an extent defined in the 'spatial' extra. - + The commands should be run from the ckanext-spatial directory and expect a development.ini file to be present. Most of the time you will specify the config explicitly though:: @@ -31,7 +30,7 @@ class Spatial(CkanCommand): summary = __doc__.split('\n')[0] usage = __doc__ - max_args = 2 + max_args = 2 min_args = 0 def command(self): @@ -43,7 +42,7 @@ def command(self): sys.exit(1) cmd = self.args[0] if cmd == 'initdb': - self.initdb() + self.initdb() elif cmd == 'extents': self.update_extents() else: @@ -56,16 +55,16 @@ def initdb(self): srid = None from ckanext.spatial.model import setup as db_setup - + db_setup(srid) print 'DB tables created' def update_extents(self): - from ckan.model import PackageExtra, Package, Session - conn = Session.connection() - packages = [extra.package \ - for extra in \ + from ckan.model import PackageExtra, Session + Session.connection() + packages = [extra.package + for extra in Session.query(PackageExtra).filter(PackageExtra.key == 'spatial').all()] errors = [] @@ -77,21 +76,19 @@ def update_extents(self): geometry = json.loads(value) count += 1 - except ValueError,e: - errors.append(u'Package %s - Error decoding JSON object: %s' % (package.id,str(e))) - except TypeError,e: - errors.append(u'Package %s - Error decoding JSON object: %s' % (package.id,str(e))) + except ValueError, e: + errors.append(u'Package %s - Error decoding JSON object: %s' % (package.id, str(e))) + except TypeError, e: + errors.append(u'Package %s - Error decoding JSON object: %s' % (package.id, str(e))) - save_package_extent(package.id,geometry) - + save_package_extent(package.id, geometry) Session.commit() - + if errors: msg = 'Errors were found:\n%s' % '\n'.join(errors) print msg - msg = "Done. Extents generated for %i out of %i packages" % (count,len(packages)) + msg = "Done. Extents generated for %i out of %i packages" % (count, len(packages)) print msg - diff --git a/ckanext/spatial/commands/validation.py b/ckanext/spatial/commands/validation.py index b2619673..ecd809ca 100644 --- a/ckanext/spatial/commands/validation.py +++ b/ckanext/spatial/commands/validation.py @@ -1,5 +1,4 @@ import sys -import re import os from pprint import pprint import logging @@ -10,6 +9,7 @@ log = logging.getLogger(__name__) + class Validation(CkanCommand): '''Validation commands @@ -21,7 +21,7 @@ class Validation(CkanCommand): validation report-csv .csv Performs validation on all the harvested metadata in the db and writes a report in CSV format to the given filepath. - + validation file .xml Performs validation on the given metadata file. ''' @@ -49,7 +49,6 @@ def command(self): def report(self): from ckan import model - from ckanext.harvest.model import HarvestObject from ckanext.spatial.lib.reports import validation_report if len(self.args) >= 2: @@ -92,7 +91,7 @@ def validate_file(self): print 'ERROR: Unicode Error reading file \'%s\': %s' % \ (metadata_filepath, e) sys.exit(1) - #import pdb; pdb.set_trace() + # import pdb; pdb.set_trace() xml = etree.fromstring(xml_string) # XML validation @@ -102,11 +101,11 @@ def validate_file(self): if valid: try: iso_document = ISODocument(xml_string) - iso_values = iso_document.read_values() + iso_document.read_values() except Exception, e: valid = False errors.append('CKAN exception reading values from ISODocument: %s' % e) - + print '***************' print 'Summary' print '***************' diff --git a/ckanext/spatial/controllers/__init__.py b/ckanext/spatial/controllers/__init__.py index d0ed2fca..267f7100 100644 --- a/ckanext/spatial/controllers/__init__.py +++ b/ckanext/spatial/controllers/__init__.py @@ -4,4 +4,3 @@ except ImportError: import pkgutil __path__ = pkgutil.extend_path(__path__, __name__) - diff --git a/ckanext/spatial/harvesters/base.py b/ckanext/spatial/harvesters/base.py index 4bac371e..82a19033 100644 --- a/ckanext/spatial/harvesters/base.py +++ b/ckanext/spatial/harvesters/base.py @@ -92,7 +92,7 @@ def guess_resource_format(url, use_mimetypes=True): return resource_type file_types = { - 'kml' : ('kml',), + 'kml': ('kml',), 'kmz': ('kmz',), 'gml': ('gml',), } @@ -122,7 +122,7 @@ class SpatialHarvester(HarvesterBase): {"type": "Polygon", "coordinates": [[[$xmin, $ymin], [$xmax, $ymin], [$xmax, $ymax], [$xmin, $ymax], [$xmin, $ymin]]]} ''') - ## IHarvester + # IHarvester def validate_config(self, source_config): if not source_config: @@ -143,16 +143,16 @@ def validate_config(self, source_config): raise ValueError('Unknown validation profile(s): %s' % ','.join(unknown_profiles)) if 'default_tags' in source_config_obj: - if not isinstance(source_config_obj['default_tags'],list): + if not isinstance(source_config_obj['default_tags'], list): raise ValueError('default_tags must be a list') if 'default_extras' in source_config_obj: - if not isinstance(source_config_obj['default_extras'],dict): + if not isinstance(source_config_obj['default_extras'], dict): raise ValueError('default_extras must be a dictionary') for key in ('override_extras', 'clean_tags'): if key in source_config_obj: - if not isinstance(source_config_obj[key],bool): + if not isinstance(source_config_obj[key], bool): raise ValueError('%s must be boolean' % key) except ValueError, e: @@ -160,10 +160,7 @@ def validate_config(self, source_config): return source_config - ## - - ## SpatialHarvester - + # SpatialHarvester def get_package_dict(self, iso_values, harvest_object): ''' @@ -203,7 +200,7 @@ def get_package_dict(self, context, data_dict): :returns: A dataset dictionary (package_dict) :rtype: dict ''' - + tags = [] if 'tags' in iso_values: @@ -287,7 +284,6 @@ def _extract_first_license_url(licences): if license_url_extracted: extras['licence_url'] = license_url_extracted - # Metadata license ID check for package use_constraints = iso_values.get('use-constraints') if use_constraints: @@ -307,7 +303,6 @@ def _extract_first_license_url(licences): package_dict['license_id'] = package_license break - extras['access_constraints'] = iso_values.get('limitations-on-public-access', '') # Grpahic preview @@ -320,7 +315,6 @@ def _extract_first_license_url(licences): if browse_graphic.get('type'): extras['graphic-preview-type'] = browse_graphic.get('type') - for key in ['temporal-extent-begin', 'temporal-extent-end']: if len(iso_values[key]) > 0: extras[key] = iso_values[key][0] @@ -350,7 +344,7 @@ def _extract_first_license_url(licences): ymax = float(bbox['north']) except ValueError, e: self._save_object_error('Error parsing bounding box value: {0}'.format(str(e)), - harvest_object, 'Import') + harvest_object, 'Import') else: # Construct a GeoJSON extent so ckanext-spatial can register the extent geometry @@ -361,7 +355,7 @@ def _extract_first_license_url(licences): x=xmin, y=ymin ) self._save_object_error('Point extent defined instead of polygon', - harvest_object, 'Import') + harvest_object, 'Import') else: extent_string = self.extent_template.substitute( xmin=xmin, ymin=ymin, xmax=xmax, ymax=ymax @@ -391,28 +385,27 @@ def _extract_first_license_url(licences): { 'url': url, 'name': resource_locator.get('name') or p.toolkit._('Unnamed resource'), - 'description': resource_locator.get('description') or '', + 'description': resource_locator.get('description') or '', 'resource_locator_protocol': resource_locator.get('protocol') or '', 'resource_locator_function': resource_locator.get('function') or '', }) package_dict['resources'].append(resource) - # Add default_extras from config - default_extras = self.source_config.get('default_extras',{}) + default_extras = self.source_config.get('default_extras', {}) if default_extras: - override_extras = self.source_config.get('override_extras',False) - for key,value in default_extras.iteritems(): - log.debug('Processing extra %s', key) - if not key in extras or override_extras: - # Look for replacement strings - if isinstance(value,basestring): - value = value.format(harvest_source_id=harvest_object.job.source.id, - harvest_source_url=harvest_object.job.source.url.strip('/'), - harvest_source_title=harvest_object.job.source.title, - harvest_job_id=harvest_object.job.id, - harvest_object_id=harvest_object.id) - extras[key] = value + override_extras = self.source_config.get('override_extras', False) + for key, value in default_extras.iteritems(): + log.debug('Processing extra %s', key) + if not key in extras or override_extras: + # Look for replacement strings + if isinstance(value, basestring): + value = value.format(harvest_source_id=harvest_object.job.source.id, + harvest_source_url=harvest_object.job.source.url.strip('/'), + harvest_source_title=harvest_object.job.source.title, + harvest_job_id=harvest_object.job.id, + harvest_object_id=harvest_object.id) + extras[key] = value extras_as_dict = [] for key, value in extras.iteritems(): @@ -456,9 +449,9 @@ def import_stage(self, harvest_object): # Get the last harvested object (if any) previous_object = model.Session.query(HarvestObject) \ - .filter(HarvestObject.guid==harvest_object.guid) \ - .filter(HarvestObject.current==True) \ - .first() + .filter(HarvestObject.guid == harvest_object.guid) \ + .filter(HarvestObject.current == True) \ + .first() if status == 'delete': # Delete package @@ -474,7 +467,7 @@ def import_stage(self, harvest_object): original_document = self._get_object_extra(harvest_object, 'original_document') original_format = self._get_object_extra(harvest_object, 'original_format') if original_document and original_format: - #DEPRECATED use the ISpatialHarvester interface method + # DEPRECATED use the ISpatialHarvester interface method self.__base_transform_to_iso_called = False content = self.transform_to_iso(original_document, original_format, harvest_object) if not self.__base_transform_to_iso_called: @@ -525,12 +518,12 @@ def import_stage(self, harvest_object): # First make sure there already aren't current objects # with the same guid existing_object = model.Session.query(HarvestObject.id) \ - .filter(HarvestObject.guid==iso_guid) \ - .filter(HarvestObject.current==True) \ + .filter(HarvestObject.guid == iso_guid) \ + .filter(HarvestObject.current == True) \ .first() if existing_object: self._save_object_error('Object {0} already has this guid {1}'.format(existing_object.id, iso_guid), - harvest_object, 'Import') + harvest_object, 'Import') return False harvest_object.guid = iso_guid @@ -548,13 +541,12 @@ def import_stage(self, harvest_object): metadata_modified_date = dateutil.parser.parse(iso_values['metadata-date'], ignoretz=True) except ValueError: self._save_object_error('Could not extract reference date for object {0} ({1})' - .format(harvest_object.id, iso_values['metadata-date']), harvest_object, 'Import') + .format(harvest_object.id, iso_values['metadata-date']), harvest_object, 'Import') return False harvest_object.metadata_modified_date = metadata_modified_date harvest_object.add() - # Build the package dict package_dict = self.get_package_dict(iso_values, harvest_object) for harvester in p.PluginImplementations(ISpatialHarvester): @@ -577,7 +569,6 @@ def import_stage(self, harvest_object): if self._site_user and context['user'] == self._site_user['name']: context['ignore_auth'] = True - # The default package schema does not like Upper case tags tag_schema = logic.schema.default_tags_schema() tag_schema['name'] = [not_empty, unicode] @@ -630,19 +621,19 @@ def import_stage(self, harvest_object): if ((config.get('ckanext.spatial.harvest.reindex_unchanged', True) != 'False' or self.source_config.get('reindex_unchanged') != 'False') and harvest_object.package_id): - context.update({'validate': False, 'ignore_auth': True}) - try: - package_dict = logic.get_action('package_show')(context, - {'id': harvest_object.package_id}) - except p.toolkit.ObjectNotFound: - pass - else: - for extra in package_dict.get('extras', []): - if extra['key'] == 'harvest_object_id': - extra['value'] = harvest_object.id - if package_dict: - package_index = PackageSearchIndex() - package_index.index_package(package_dict) + context.update({'validate': False, 'ignore_auth': True}) + try: + package_dict = logic.get_action('package_show')(context, + {'id': harvest_object.package_id}) + except p.toolkit.ObjectNotFound: + pass + else: + for extra in package_dict.get('extras', []): + if extra['key'] == 'harvest_object_id': + extra['value'] = harvest_object.id + if package_dict: + package_index = PackageSearchIndex() + package_index.index_package(package_dict) log.info('Document with GUID %s unchanged, skipping...' % (harvest_object.guid)) else: @@ -729,7 +720,6 @@ def _get_validator(self): if custom_validator not in all_validators: self._validator.add_validator(custom_validator) - return self._validator def _get_user_name(self): @@ -750,8 +740,8 @@ def _get_user_name(self): context = {'model': model, 'ignore_auth': True, - 'defer_commit': True, # See ckan/ckan#1714 - } + 'defer_commit': True, # See ckan/ckan#1714 + } self._site_user = p.toolkit.get_action('get_site_user')(context, {}) config_user_name = config.get('ckanext.spatial.harvest.user_name') diff --git a/ckanext/spatial/harvesters/csw.py b/ckanext/spatial/harvesters/csw.py index 2853a10c..a319146d 100644 --- a/ckanext/spatial/harvesters/csw.py +++ b/ckanext/spatial/harvesters/csw.py @@ -77,8 +77,8 @@ def gather_stage(self, harvest_job): return None query = model.Session.query(HarvestObject.guid, HarvestObject.package_id).\ - filter(HarvestObject.current==True).\ - filter(HarvestObject.harvest_source_id==harvest_job.source.id) + filter(HarvestObject.current == True).\ + filter(HarvestObject.harvest_source_id == harvest_job.source.id) guid_to_package_id = {} for guid, package_id in query: @@ -101,10 +101,9 @@ def gather_stage(self, harvest_job): guids_in_harvest.add(identifier) except Exception, e: - self._save_gather_error('Error for the identifier %s [%r]' % (identifier,e), harvest_job) + self._save_gather_error('Error for the identifier %s [%r]' % (identifier, e), harvest_job) continue - except Exception, e: log.error('Exception: %s' % text_traceback()) self._save_gather_error('Error gathering the identifiers from the CSW server [%s]' % str(e), harvest_job) @@ -131,8 +130,8 @@ def gather_stage(self, harvest_job): package_id=guid_to_package_id[guid], extras=[HOExtra(key='status', value='delete')]) model.Session.query(HarvestObject).\ - filter_by(guid=guid).\ - update({'current': False}, False) + filter_by(guid=guid).\ + update({'current': False}, False) obj.save() ids.append(obj.id) @@ -142,7 +141,7 @@ def gather_stage(self, harvest_job): return ids - def fetch_stage(self,harvest_object): + def fetch_stage(self, harvest_object): # Check harvest object status status = self._get_object_extra(harvest_object, 'status') @@ -182,8 +181,8 @@ def fetch_stage(self,harvest_object): harvest_object.content = content.strip() harvest_object.save() - except Exception,e: - self._save_object_error('Error saving the harvest object for GUID %s [%r]' % \ + except Exception, e: + self._save_object_error('Error saving the harvest object for GUID %s [%r]' % (identifier, e), harvest_object) return False @@ -192,4 +191,3 @@ def fetch_stage(self,harvest_object): def _setup_csw_client(self, url): self.csw = CswService(url) - diff --git a/ckanext/spatial/harvesters/doc.py b/ckanext/spatial/harvesters/doc.py index e8a6daae..3b71893c 100644 --- a/ckanext/spatial/harvesters/doc.py +++ b/ckanext/spatial/harvesters/doc.py @@ -27,18 +27,16 @@ def info(self): 'description': 'A single spatial metadata document' } - def get_original_url(self, harvest_object_id): obj = model.Session.query(HarvestObject).\ - filter(HarvestObject.id==harvest_object_id).\ + filter(HarvestObject.id == harvest_object_id).\ first() if not obj: return None return obj.source.url - - def gather_stage(self,harvest_job): + def gather_stage(self, harvest_job): log = logging.getLogger(__name__ + '.individual.gather') log.debug('DocHarvester gather_stage for job: %r', harvest_job) @@ -52,34 +50,32 @@ def gather_stage(self,harvest_job): # Get contents try: content = self._get_content_as_unicode(url) - except Exception,e: - self._save_gather_error('Unable to get content for URL: %s: %r' % \ - (url, e),harvest_job) + except Exception, e: + self._save_gather_error('Unable to get content for URL: %s: %r' % + (url, e), harvest_job) return None existing_object = model.Session.query(HarvestObject.guid, HarvestObject.package_id).\ - filter(HarvestObject.current==True).\ - filter(HarvestObject.harvest_source_id==harvest_job.source.id).\ - first() + filter(HarvestObject.current == True).\ + filter(HarvestObject.harvest_source_id == harvest_job.source.id).\ + first() def create_extras(url, status): return [HOExtra(key='doc_location', value=url), HOExtra(key='status', value=status)] if not existing_object: - guid=hashlib.md5(url.encode('utf8', 'ignore')).hexdigest() + guid = hashlib.md5(url.encode('utf8', 'ignore')).hexdigest() harvest_object = HarvestObject(job=harvest_job, - extras=create_extras(url, - 'new'), - guid=guid - ) + extras=create_extras(url, 'new'), + guid=guid + ) else: harvest_object = HarvestObject(job=harvest_job, - extras=create_extras(url, - 'change'), - guid=existing_object.guid, - package_id=existing_object.package_id - ) + extras=create_extras(url, 'change'), + guid=existing_object.guid, + package_id=existing_object.package_id + ) harvest_object.add() @@ -104,10 +100,6 @@ def create_extras(url, status): return [harvest_object.id] - - - - def fetch_stage(self,harvest_object): + def fetch_stage(self, harvest_object): # The fetching was already done in the previous stage return True - diff --git a/ckanext/spatial/harvesters/gemini.py b/ckanext/spatial/harvesters/gemini.py index 8dc65d45..0ed9ba36 100644 --- a/ckanext/spatial/harvesters/gemini.py +++ b/ckanext/spatial/harvesters/gemini.py @@ -52,7 +52,6 @@ class GeminiHarvester(SpatialHarvester): All three harvesters share the same import stage ''' - def import_stage(self, harvest_object): log = logging.getLogger(__name__ + '.import') log.debug('Import stage for harvest object: %r', harvest_object) @@ -65,7 +64,7 @@ def import_stage(self, harvest_object): self.obj = harvest_object if harvest_object.content is None: - self._save_object_error('Empty content for object %s' % harvest_object.id,harvest_object,'Import') + self._save_object_error('Empty content for object %s' % harvest_object.id, harvest_object, 'Import') return False try: self.import_gemini_object(harvest_object.content) @@ -95,13 +94,12 @@ def import_gemini_object(self, gemini_string): if not valid: out = errors[0][0] + ':\n' + '\n'.join(e[0] for e in errors[1:]) log.error('Errors found for object with GUID %s:' % self.obj.guid) - self._save_object_error(out,self.obj,'Import') + self._save_object_error(out, self.obj, 'Import') unicode_gemini_string = etree.tostring(xml, encoding=unicode, pretty_print=True) # may raise Exception for errors - package_dict = self.write_package_from_gemini_string(unicode_gemini_string) - + self.write_package_from_gemini_string(unicode_gemini_string) def write_package_from_gemini_string(self, content): '''Create or update a Package based on some content that has @@ -118,26 +116,26 @@ def write_package_from_gemini_string(self, content): # Save the metadata reference date in the Harvest Object try: - metadata_modified_date = datetime.strptime(gemini_values['metadata-date'],'%Y-%m-%d') + metadata_modified_date = datetime.strptime(gemini_values['metadata-date'], '%Y-%m-%d') except ValueError: try: - metadata_modified_date = datetime.strptime(gemini_values['metadata-date'],'%Y-%m-%dT%H:%M:%S') + metadata_modified_date = datetime.strptime(gemini_values['metadata-date'], '%Y-%m-%dT%H:%M:%S') except: - raise Exception('Could not extract reference date for GUID %s (%s)' \ - % (gemini_guid,gemini_values['metadata-date'])) + raise Exception('Could not extract reference date for GUID %s (%s)' % + (gemini_guid, gemini_values['metadata-date'])) self.obj.metadata_modified_date = metadata_modified_date self.obj.save() last_harvested_object = Session.query(HarvestObject) \ - .filter(HarvestObject.guid==gemini_guid) \ - .filter(HarvestObject.current==True) \ - .all() + .filter(HarvestObject.guid == gemini_guid) \ + .filter(HarvestObject.current == True) \ + .all() if len(last_harvested_object) == 1: last_harvested_object = last_harvested_object[0] elif len(last_harvested_object) > 1: - raise Exception('Application Error: more than one current record for GUID %s' % gemini_guid) + raise Exception('Application Error: more than one current record for GUID %s' % gemini_guid) reactivate_package = False if last_harvested_object: @@ -152,7 +150,7 @@ def write_package_from_gemini_string(self, content): last_harvested_object.source.active is False): if self.force_import: - log.info('Import forced for object %s with GUID %s' % (self.obj.id,gemini_guid)) + log.info('Import forced for object %s with GUID %s' % (self.obj.id, gemini_guid)) else: log.info('Package for object with GUID %s needs to be created or updated' % gemini_guid) @@ -165,8 +163,8 @@ def write_package_from_gemini_string(self, content): log.info('Package for object with GUID %s will be re-activated' % gemini_guid) reactivate_package = True else: - log.info('Remote record with GUID %s is not more recent than a deleted package, skipping... ' % gemini_guid) - return None + log.info('Remote record with GUID %s is not more recent than a deleted package, skipping... ' % gemini_guid) + return None else: if last_harvested_object.content != self.obj.content and \ @@ -175,7 +173,8 @@ def write_package_from_gemini_string(self, content): last_harvested_object.content.split('\n'), self.obj.content.split('\n')) diff = '\n'.join([line for line in diff_generator]) - raise Exception('The contents of document with GUID %s changed, but the metadata date has not been updated.\nDiff:\n%s' % (gemini_guid, diff)) + raise Exception('The contents of document with GUID %s changed, ' + 'but the metadata date has not been updated.\nDiff:\n%s' % (gemini_guid, diff)) else: # The content hasn't changed, no need to update the package log.info('Document with GUID %s unchanged, skipping...' % (gemini_guid)) @@ -195,8 +194,8 @@ def write_package_from_gemini_string(self, content): 'guid', # Usefuls 'dataset-reference-date', - 'metadata-language', # Language - 'metadata-date', # Released + 'metadata-language', # Language + 'metadata-date', # Released 'coupled-resource', 'contact-email', 'frequency-of-update', @@ -222,12 +221,12 @@ def write_package_from_gemini_string(self, content): if licence_url_extracted: extras['licence_url'] = licence_url_extracted - extras['access_constraints'] = gemini_values.get('limitations-on-public-access','') + extras['access_constraints'] = gemini_values.get('limitations-on-public-access', '') if gemini_values.has_key('temporal-extent-begin'): - #gemini_values['temporal-extent-begin'].sort() + # gemini_values['temporal-extent-begin'].sort() extras['temporal_coverage-from'] = gemini_values['temporal-extent-begin'] if gemini_values.has_key('temporal-extent-end'): - #gemini_values['temporal-extent-end'].sort() + # gemini_values['temporal-extent-end'].sort() extras['temporal_coverage-to'] = gemini_values['temporal-extent-end'] # Save responsible organization roles @@ -236,7 +235,7 @@ def write_package_from_gemini_string(self, content): extras['provider'] = provider extras['responsible-party'] = '; '.join(responsible_parties) - if len(gemini_values['bbox']) >0: + if len(gemini_values['bbox']) > 0: extras['bbox-east-long'] = gemini_values['bbox'][0]['east'] extras['bbox-north-lat'] = gemini_values['bbox'][0]['north'] extras['bbox-south-lat'] = gemini_values['bbox'][0]['south'] @@ -244,10 +243,10 @@ def write_package_from_gemini_string(self, content): # Construct a GeoJSON extent so ckanext-spatial can register the extent geometry extent_string = self.extent_template.substitute( - xmin = extras['bbox-east-long'], - ymin = extras['bbox-south-lat'], - xmax = extras['bbox-west-long'], - ymax = extras['bbox-north-lat'] + xmin=extras['bbox-east-long'], + ymin=extras['bbox-south-lat'], + xmax=extras['bbox-west-long'], + ymax=extras['bbox-north-lat'] ) extras['spatial'] = extent_string.strip() @@ -255,18 +254,17 @@ def write_package_from_gemini_string(self, content): tags = [] for tag in gemini_values['tags']: tag = tag[:50] if len(tag) > 50 else tag - tags.append({'name':tag}) + tags.append({'name': tag}) package_dict = { 'title': gemini_values['title'], 'notes': gemini_values['abstract'], 'tags': tags, - 'resources':[] + 'resources': [] } if self.obj.source.publisher_id: - package_dict['groups'] = [{'id':self.obj.source.publisher_id}] - + package_dict['groups'] = [{'id': self.obj.source.publisher_id}] if reactivate_package: package_dict['state'] = u'active' @@ -285,7 +283,7 @@ def write_package_from_gemini_string(self, content): if len(resource_locators): for resource_locator in resource_locators: - url = resource_locator.get('url','') + url = resource_locator.get('url', '') if url: resource_format = '' resource = {} @@ -299,12 +297,12 @@ def write_package_from_gemini_string(self, content): resource.update( { 'url': url, - 'name': resource_locator.get('name',''), - 'description': resource_locator.get('description') if resource_locator.get('description') else 'Resource locator', + 'name': resource_locator.get('name', ''), + 'description': resource_locator.get('description') if resource_locator.get('description') + else 'Resource locator', 'format': resource_format or None, - 'resource_locator_protocol': resource_locator.get('protocol',''), - 'resource_locator_function':resource_locator.get('function','') - + 'resource_locator_protocol': resource_locator.get('protocol', ''), + 'resource_locator_function': resource_locator.get('function', '') }) package_dict['resources'].append(resource) @@ -318,11 +316,11 @@ def write_package_from_gemini_string(self, content): view_resources[0]['ckan_recommended_wms_preview'] = True extras_as_dict = [] - for key,value in extras.iteritems(): - if isinstance(value,(basestring,Number)): - extras_as_dict.append({'key':key,'value':value}) + for key, value in extras.iteritems(): + if isinstance(value, (basestring, Number)): + extras_as_dict.append({'key': key, 'value': value}) else: - extras_as_dict.append({'key':key,'value':json.dumps(value)}) + extras_as_dict.append({'key': key, 'value': json.dumps(value)}) package_dict['extras'] = extras_as_dict @@ -331,15 +329,15 @@ def write_package_from_gemini_string(self, content): package = self._create_package_from_data(package_dict) log.info('Created new package ID %s with GEMINI guid %s', package['id'], gemini_guid) else: - package = self._create_package_from_data(package_dict, package = package) + package = self._create_package_from_data(package_dict, package=package) log.info('Updated existing package ID %s with existing GEMINI guid %s', package['id'], gemini_guid) # Flag the other objects of this source as not current anymore from ckanext.harvest.model import harvest_object_table u = update(harvest_object_table) \ - .where(harvest_object_table.c.package_id==bindparam('b_package_id')) \ - .values(current=False) - Session.execute(u, params={'b_package_id':package['id']}) + .where(harvest_object_table.c.package_id == bindparam('b_package_id')) \ + .values(current=False) + Session.execute(u, params={'b_package_id': package['id']}) Session.commit() # Refresh current object from session, otherwise the @@ -427,7 +425,7 @@ def _extract_first_licence_url(self, licences): return licence return None - def _create_package_from_data(self, package_dict, package = None): + def _create_package_from_data(self, package_dict, package=None): ''' {'name': 'council-owned-litter-bins', 'notes': 'Location of Council owned litter bins within Borough.', @@ -453,15 +451,15 @@ def _create_package_from_data(self, package_dict, package = None): # The default package schema does not like Upper case tags tag_schema = logic.schema.default_tags_schema() - tag_schema['name'] = [not_empty,unicode] + tag_schema['name'] = [not_empty, unicode] package_schema['tags'] = tag_schema # TODO: user - context = {'model':model, - 'session':Session, - 'user':'harvest', - 'schema':package_schema, - 'extras_as_string':True, + context = {'model': model, + 'session': Session, + 'user': 'harvest', + 'schema': package_schema, + 'extras_as_string': True, 'api_version': '2'} if not package: # We need to explicitly provide a package ID, otherwise ckanext-spatial @@ -476,14 +474,14 @@ def _create_package_from_data(self, package_dict, package = None): try: package_dict = action_function(context, package_dict) - except ValidationError,e: + except ValidationError, e: raise Exception('Validation Error: %s' % str(e.error_summary)) if debug_exception_mode: raise return package_dict - def get_gemini_string_and_guid(self,content,url=None): + def get_gemini_string_and_guid(self, content, url=None): '''From a string buffer containing Gemini XML, return the tree under gmd:MD_Metadata and the GUID for it. @@ -516,13 +514,14 @@ def get_gemini_string_and_guid(self,content,url=None): return gemini_string, gemini_guid + class GeminiCswHarvester(GeminiHarvester, SingletonPlugin): ''' A Harvester for CSW servers ''' implements(IHarvester) - csw=None + csw = None def info(self): return { @@ -543,7 +542,6 @@ def gather_stage(self, harvest_job): self._save_gather_error('Error contacting the CSW server: %s' % e, harvest_job) return None - log.debug('Starting gathering for %s' % url) used_identifiers = [] ids = [] @@ -556,7 +554,7 @@ def gather_stage(self, harvest_job): continue if identifier is None: log.error('CSW returned identifier %r, skipping...' % identifier) - ## log an error here? happens with the dutch data + # log an error here? happens with the dutch data continue # Create a new HarvestObject for this identifier @@ -566,7 +564,7 @@ def gather_stage(self, harvest_job): ids.append(obj.id) used_identifiers.append(identifier) except Exception, e: - self._save_gather_error('Error for the identifier %s [%r]' % (identifier,e), harvest_job) + self._save_gather_error('Error for the identifier %s [%r]' % (identifier, e), harvest_job) continue except Exception, e: @@ -580,7 +578,7 @@ def gather_stage(self, harvest_job): return ids - def fetch_stage(self,harvest_object): + def fetch_stage(self, harvest_object): log = logging.getLogger(__name__ + '.CSW.fetch') log.debug('GeminiCswHarvester fetch_stage for object: %r', harvest_object) @@ -608,8 +606,8 @@ def fetch_stage(self,harvest_object): # Save the fetch contents in the HarvestObject harvest_object.content = record['xml'] harvest_object.save() - except Exception,e: - self._save_object_error('Error saving the harvest object for GUID %s [%r]' % \ + except Exception, e: + self._save_object_error('Error saving the harvest object for GUID %s [%r]' % (identifier, e), harvest_object) return False @@ -634,7 +632,7 @@ def info(self): 'description': 'A single GEMINI 2.1 document' } - def gather_stage(self,harvest_job): + def gather_stage(self, harvest_job): log = logging.getLogger(__name__ + '.individual.gather') log.debug('GeminiDocHarvester gather_stage for job: %r', harvest_job) @@ -646,13 +644,13 @@ def gather_stage(self,harvest_job): # Get contents try: content = self._get_content(url) - except Exception,e: - self._save_gather_error('Unable to get content for URL: %s: %r' % \ - (url, e),harvest_job) + except Exception, e: + self._save_gather_error('Unable to get content for URL: %s: %r' % + (url, e), harvest_job) return None try: # We need to extract the guid to pass it to the next stage - gemini_string, gemini_guid = self.get_gemini_string_and_guid(content,url) + gemini_string, gemini_guid = self.get_gemini_string_and_guid(content, url) if gemini_guid: # Create a new HarvestObject for this identifier @@ -669,13 +667,12 @@ def gather_stage(self,harvest_job): self._save_gather_error('Could not get the GUID for source %s' % url, harvest_job) return None except Exception, e: - self._save_gather_error('Error parsing the document. Is this a valid Gemini document?: %s [%r]'% (url,e),harvest_job) + self._save_gather_error('Error parsing the document. Is this a valid Gemini document?: %s [%r]' % (url, e), harvest_job) if debug_exception_mode: raise return None - - def fetch_stage(self,harvest_object): + def fetch_stage(self, harvest_object): # The fetching was already done in the previous stage return True @@ -695,7 +692,7 @@ def info(self): 'description': 'A Web Accessible Folder (WAF) displaying a list of GEMINI 2.1 documents' } - def gather_stage(self,harvest_job): + def gather_stage(self, harvest_job): log = logging.getLogger(__name__ + '.WAF.gather') log.debug('GeminiWafHarvester gather_stage for job: %r', harvest_job) @@ -707,23 +704,23 @@ def gather_stage(self,harvest_job): # Get contents try: content = self._get_content(url) - except Exception,e: - self._save_gather_error('Unable to get content for URL: %s: %r' % \ - (url, e),harvest_job) + except Exception, e: + self._save_gather_error('Unable to get content for URL: %s: %r' % + (url, e), harvest_job) return None ids = [] try: - for url in self._extract_urls(content,url): + for url in self._extract_urls(content, url): try: content = self._get_content(url) except Exception, e: msg = 'Couldn\'t harvest WAF link: %s: %s' % (url, e) - self._save_gather_error(msg,harvest_job) + self._save_gather_error(msg, harvest_job) continue else: # We need to extract the guid to pass it to the next stage try: - gemini_string, gemini_guid = self.get_gemini_string_and_guid(content,url) + gemini_string, gemini_guid = self.get_gemini_string_and_guid(content, url) if gemini_guid: log.debug('Got GUID %s' % gemini_guid) # Create a new HarvestObject for this identifier @@ -736,28 +733,25 @@ def gather_stage(self,harvest_job): ids.append(obj.id) - - except Exception,e: - msg = 'Could not get GUID for source %s: %r' % (url,e) - self._save_gather_error(msg,harvest_job) + except Exception, e: + msg = 'Could not get GUID for source %s: %r' % (url, e) + self._save_gather_error(msg, harvest_job) continue - except Exception,e: + except Exception, e: msg = 'Error extracting URLs from %s' % url - self._save_gather_error(msg,harvest_job) + self._save_gather_error(msg, harvest_job) return None if len(ids) > 0: return ids else: - self._save_gather_error('Couldn\'t find any links to metadata files', - harvest_job) + self._save_gather_error('Couldn\'t find any links to metadata files', harvest_job) return None - def fetch_stage(self,harvest_object): + def fetch_stage(self, harvest_object): # The fetching was already done in the previous stage return True - def _extract_urls(self, content, base_url): ''' Get the URLs out of a WAF index page @@ -795,5 +789,3 @@ def _extract_urls(self, content, base_url): base_url += '/' log.debug('WAF base URL: %s', base_url) return [base_url + i for i in urls] - - diff --git a/ckanext/spatial/plugin.py b/ckanext/spatial/plugin.py index e2d73df3..19015d97 100644 --- a/ckanext/spatial/plugin.py +++ b/ckanext/spatial/plugin.py @@ -33,6 +33,7 @@ def check_geoalchemy_requirement(): except ImportError: raise ImportError(msg.format('geoalchemy')) + check_geoalchemy_requirement() log = getLogger(__name__) @@ -59,6 +60,7 @@ def prettify(field_name): summary[p.toolkit._(prettify(key))] = error[0] return summary + class SpatialMetadata(p.SingletonPlugin): p.implements(p.IPackageController, inherit=True) @@ -92,7 +94,7 @@ def create(self, package): def edit(self, package): self.check_spatial_extra(package) - def check_spatial_extra(self,package): + def check_spatial_extra(self, package): ''' For a given package, looks at the spatial extent (as given in the extra "spatial" in GeoJSON format) and records it in PostGIS. @@ -110,46 +112,46 @@ def check_spatial_extra(self,package): try: log.debug('Received: %r' % extra.value) geometry = json.loads(extra.value) - except ValueError,e: - error_dict = {'spatial':[u'Error decoding JSON object: %s' % str(e)]} + except ValueError, e: + error_dict = {'spatial': [u'Error decoding JSON object: %s' % str(e)]} raise p.toolkit.ValidationError(error_dict, error_summary=package_error_summary(error_dict)) - except TypeError,e: - error_dict = {'spatial':[u'Error decoding JSON object: %s' % str(e)]} + except TypeError, e: + error_dict = {'spatial': [u'Error decoding JSON object: %s' % str(e)]} raise p.toolkit.ValidationError(error_dict, error_summary=package_error_summary(error_dict)) try: - save_package_extent(package.id,geometry) + save_package_extent(package.id, geometry) - except ValueError,e: - error_dict = {'spatial':[u'Error creating geometry: %s' % str(e)]} + except ValueError, e: + error_dict = {'spatial': [u'Error creating geometry: %s' % str(e)]} raise p.toolkit.ValidationError(error_dict, error_summary=package_error_summary(error_dict)) except Exception, e: if bool(os.getenv('DEBUG')): raise - error_dict = {'spatial':[u'Error: %s' % str(e)]} + error_dict = {'spatial': [u'Error: %s' % str(e)]} raise p.toolkit.ValidationError(error_dict, error_summary=package_error_summary(error_dict)) elif (extra.state == 'active' and not extra.value) or extra.state == 'deleted': # Delete extent from table - save_package_extent(package.id,None) + save_package_extent(package.id, None) break - def delete(self, package): from ckanext.spatial.lib import save_package_extent - save_package_extent(package.id,None) + save_package_extent(package.id, None) - ## ITemplateHelpers + # ITemplateHelpers def get_helpers(self): from ckanext.spatial import helpers as spatial_helpers return { - 'get_reference_date' : spatial_helpers.get_reference_date, + 'get_reference_date': spatial_helpers.get_reference_date, 'get_responsible_party': spatial_helpers.get_responsible_party, - 'get_common_map_config' : spatial_helpers.get_common_map_config, + 'get_common_map_config': spatial_helpers.get_common_map_config, } + class SpatialQuery(p.SingletonPlugin): p.implements(p.IRoutes, inherit=True) @@ -169,8 +171,8 @@ def configure(self, config): def before_map(self, map): map.connect('api_spatial_query', '/api/2/search/{register:dataset|package}/geo', - controller='ckanext.spatial.controllers.api:ApiController', - action='spatial_query') + controller='ckanext.spatial.controllers.api:ApiController', + action='spatial_query') return map def before_index(self, pkg_dict): @@ -189,7 +191,8 @@ def before_index(self, pkg_dict): if not (geometry['type'] == 'Polygon' and len(geometry['coordinates']) == 1 and len(geometry['coordinates'][0]) == 5): - log.error('Solr backend only supports bboxes (Polygons with 5 points), ignoring geometry {0}'.format(pkg_dict['extras_spatial'])) + log.error('Solr backend only supports bboxes (Polygons with 5 points), ignoring geometry {0}' + .format(pkg_dict['extras_spatial'])) return pkg_dict coords = geometry['coordinates'] @@ -232,11 +235,10 @@ def before_index(self, pkg_dict): pkg_dict['spatial_geom'] = wkt - return pkg_dict def before_search(self, search_params): - from ckanext.spatial.lib import validate_bbox + from ckanext.spatial.lib import validate_bbox from ckan.lib.search import SearchError if search_params.get('extras', None) and search_params['extras'].get('ext_bbox', None): @@ -282,7 +284,7 @@ def _params_for_solr_search(self, bbox, search_params): ''' - variables =dict( + variables = dict( x11=bbox['minx'], x12=bbox['maxx'], y11=bbox['miny'], @@ -291,7 +293,7 @@ def _params_for_solr_search(self, bbox, search_params): x22='maxx', y21='miny', y22='maxy', - area_search = abs(bbox['maxx'] - bbox['minx']) * abs(bbox['maxy'] - bbox['miny']) + area_search=abs(bbox['maxx'] - bbox['minx']) * abs(bbox['maxy'] - bbox['miny']) ) bf = '''div( @@ -301,7 +303,7 @@ def _params_for_solr_search(self, bbox, search_params): ), 2), add({area_search}, mul(sub({y22}, {y21}), sub({x22}, {x21}))) - )'''.format(**variables).replace('\n','').replace(' ','') + )'''.format(**variables).replace('\n', '').replace(' ', '') search_params['fq_list'] = ['{!frange incl=false l=0 u=1}%s' % bf] @@ -324,7 +326,7 @@ def _params_for_solr_spatial_field_search(self, bbox, search_params): return search_params def _params_for_postgis_search(self, bbox, search_params): - from ckanext.spatial.lib import bbox_query, bbox_query_ordered + from ckanext.spatial.lib import bbox_query, bbox_query_ordered from ckan.lib.search import SearchError # Note: This will be deprecated at some point in favour of the @@ -344,13 +346,13 @@ def _params_for_postgis_search(self, bbox, search_params): # they are in the wrong order anyway. We just need this SOLR # query to get the count and facet counts. rows = 0 - search_params['sort'] = None # SOLR should not sort. + search_params['sort'] = None # SOLR should not sort. # Store the rankings of the results for this page, so for # after_search to construct the correctly sorted results rows = search_params['extras']['ext_rows'] = search_params['rows'] start = search_params['extras']['ext_start'] = search_params['start'] search_params['extras']['ext_spatial'] = [ - (extent.package_id, extent.spatial_ranking) \ + (extent.package_id, extent.spatial_ranking) for extent in extents[start:start+rows]] else: extents = bbox_query(bbox) @@ -364,7 +366,7 @@ def _params_for_postgis_search(self, bbox, search_params): # of datasets within the bbox bbox_query_ids = [extent.package_id for extent in extents] - q = search_params.get('q','').strip() or '""' + q = search_params.get('q', '').strip() or '""' new_q = '%s AND ' % q if q else '' new_q += '(%s)' % ' OR '.join(['id:%s' % id for id in bbox_query_ids]) @@ -390,6 +392,7 @@ def after_search(self, search_results, search_params): search_results['results'] = pkgs return search_results + class HarvestMetadataApi(p.SingletonPlugin): ''' Harvest Metadata API @@ -406,8 +409,8 @@ def before_map(self, route_map): # Showing the harvest object content is an action of the default # harvest plugin, so just redirect there route_map.redirect('/api/2/rest/harvestobject/{id:.*}/xml', - '/harvest/object/{id}', - _redirect_code='301 Moved Permanently') + '/harvest/object/{id}', + _redirect_code='301 Moved Permanently') route_map.connect('/harvest/object/{id}/original', controller=controller, action='display_xml_original') @@ -419,8 +422,8 @@ def before_map(self, route_map): # Redirect old URL to a nicer and unversioned one route_map.redirect('/api/2/rest/harvestobject/:id/html', - '/harvest/object/{id}/html', - _redirect_code='301 Moved Permanently') + '/harvest/object/{id}/html', + _redirect_code='301 Moved Permanently') return route_map From 0db757e85940d3e2ca6dfe301f294db56862e69d Mon Sep 17 00:00:00 2001 From: Teemu Leivo Date: Wed, 27 Feb 2019 12:44:49 +0200 Subject: [PATCH 04/12] All clear changes made --- bin/ckan_pycsw.py | 48 +++++++++++++--------------- ckanext/spatial/geoalchemy_common.py | 2 -- ckanext/spatial/harvesters/base.py | 19 +++++------ ckanext/spatial/harvesters/csw.py | 7 ++-- ckanext/spatial/harvesters/waf.py | 2 +- ckanext/spatial/helpers.py | 4 ++- ckanext/spatial/interfaces.py | 1 - setup.py | 42 ++++++++++++------------ 8 files changed, 60 insertions(+), 65 deletions(-) diff --git a/bin/ckan_pycsw.py b/bin/ckan_pycsw.py index 471c71c6..1c68b79c 100644 --- a/bin/ckan_pycsw.py +++ b/bin/ckan_pycsw.py @@ -10,10 +10,15 @@ import pycsw.config import pycsw.admin +import os +import argparse +from ConfigParser import SafeConfigParser + logging.basicConfig(format='%(message)s', level=logging.INFO) log = logging.getLogger(__name__) + def setup_db(pycsw_config): """Setup database tables and indexes""" @@ -28,9 +33,9 @@ def setup_db(pycsw_config): ] pycsw.admin.setup_db(database, - table_name, '', - create_plpythonu_functions=False, - extra_columns=ckan_columns) + table_name, '', + create_plpythonu_functions=False, + extra_columns=ckan_columns) def set_keywords(pycsw_config_file, pycsw_config, ckan_url, limit=20): @@ -63,7 +68,8 @@ def load(pycsw_config, ckan_url): log.info('Started gathering CKAN datasets identifiers: {0}'.format(str(datetime.datetime.now()))) - query = 'api/search/dataset?qjson={"fl":"id,metadata_modified,extras_harvest_object_id,extras_metadata_source", "q":"harvest_object_id:[\\"\\" TO *]", "limit":1000, "start":%s}' + query = 'api/search/dataset?qjson={"fl":"id,metadata_modified,extras_harvest_object_id,' \ + 'extras_metadata_source", "q":"harvest_object_id:[\\"\\" TO *]", "limit":1000, "start":%s}' start = 0 @@ -111,8 +117,7 @@ def load(pycsw_config, ckan_url): for ckan_id in deleted: try: repo.session.begin() - repo.session.query(repo.dataset.ckan_id).filter_by( - ckan_id=ckan_id).delete() + repo.session.query(repo.dataset.ckan_id).filter_by(ckan_id=ckan_id).delete() log.info('Deleted %s' % ckan_id) repo.session.commit() except Exception, err: @@ -137,12 +142,11 @@ def load(pycsw_config, ckan_url): if not record: continue update_dict = dict([(getattr(repo.dataset, key), - getattr(record, key)) \ - for key in record.__dict__.keys() if key != '_sa_instance_state']) + getattr(record, key)) + for key in record.__dict__.keys() if key != '_sa_instance_state']) try: repo.session.begin() - repo.session.query(repo.dataset).filter_by( - ckan_id=ckan_id).update(update_dict) + repo.session.query(repo.dataset).filter_by(ckan_id=ckan_id).update(update_dict) repo.session.commit() log.info('Changed %s' % ckan_id) except Exception, err: @@ -192,7 +196,7 @@ def get_record(context, repo, ckan_url, ckan_id, ckan_info): return record -usage=''' +usage = ''' Manages the CKAN-pycsw integration python ckan-pycsw.py setup [-p] @@ -219,6 +223,7 @@ def get_record(context, repo, ckan_url, ckan_id, ckan_info): ''' + def _load_config(file_path): abs_path = os.path.abspath(file_path) if not os.path.exists(abs_path): @@ -230,25 +235,18 @@ def _load_config(file_path): return config - -import os -import argparse -from ConfigParser import SafeConfigParser - if __name__ == '__main__': - parser = argparse.ArgumentParser( - description='\n'.split(usage)[0], - usage=usage) - parser.add_argument('command', - help='Command to perform') + parser = argparse.ArgumentParser(description='\n'.split(usage)[0], + usage=usage) + parser.add_argument('command', help='Command to perform') parser.add_argument('-p', '--pycsw_config', - action='store', default='default.cfg', - help='pycsw config file to use.') + action='store', default='default.cfg', + help='pycsw config file to use.') parser.add_argument('-u', '--ckan_url', - action='store', - help='CKAN instance to import the datasets from.') + action='store', + help='CKAN instance to import the datasets from.') if len(sys.argv) <= 1: parser.print_usage() diff --git a/ckanext/spatial/geoalchemy_common.py b/ckanext/spatial/geoalchemy_common.py index 308455de..6efef52c 100644 --- a/ckanext/spatial/geoalchemy_common.py +++ b/ckanext/spatial/geoalchemy_common.py @@ -13,7 +13,6 @@ if toolkit.check_ckan_version(min_version='2.3'): # CKAN >= 2.3, use GeoAlchemy2 - from geoalchemy2.elements import WKTElement from geoalchemy2 import Geometry from sqlalchemy import func ST_Transform = func.ST_Transform @@ -23,7 +22,6 @@ else: # CKAN < 2.3, use GeoAlchemy - from geoalchemy import WKTSpatialElement as WKTElement from geoalchemy import functions ST_Transform = functions.transform ST_Equals = functions.equals diff --git a/ckanext/spatial/harvesters/base.py b/ckanext/spatial/harvesters/base.py index 82a19033..55dd4389 100644 --- a/ckanext/spatial/harvesters/base.py +++ b/ckanext/spatial/harvesters/base.py @@ -606,7 +606,8 @@ def import_stage(self, harvest_object): elif status == 'change': # Check if the modified date is more recent - if not self.force_import and previous_object and harvest_object.metadata_modified_date <= previous_object.metadata_modified_date: + if not self.force_import and previous_object + and harvest_object.metadata_modified_date <= previous_object.metadata_modified_date: # Assign the previous job id to the new object to # avoid losing history @@ -619,14 +620,14 @@ def import_stage(self, harvest_object): # Reindex the corresponding package to update the reference to the # harvest object if ((config.get('ckanext.spatial.harvest.reindex_unchanged', True) != 'False' - or self.source_config.get('reindex_unchanged') != 'False') - and harvest_object.package_id): - context.update({'validate': False, 'ignore_auth': True}) - try: - package_dict = logic.get_action('package_show')(context, - {'id': harvest_object.package_id}) - except p.toolkit.ObjectNotFound: - pass + or self.source_config.get('reindex_unchanged') != 'False') + and harvest_object.package_id): + context.update({'validate': False, 'ignore_auth': True}) + try: + package_dict = logic.get_action('package_show')(context, + {'id': harvest_object.package_id}) + except p.toolkit.ObjectNotFound: + pass else: for extra in package_dict.get('extras', []): if extra['key'] == 'harvest_object_id': diff --git a/ckanext/spatial/harvesters/csw.py b/ckanext/spatial/harvesters/csw.py index a319146d..9bb246ec 100644 --- a/ckanext/spatial/harvesters/csw.py +++ b/ckanext/spatial/harvesters/csw.py @@ -22,7 +22,7 @@ class CSWHarvester(SpatialHarvester, SingletonPlugin): ''' implements(IHarvester) - csw=None + csw = None def info(self): return { @@ -31,10 +31,9 @@ def info(self): 'description': 'A server that implements OGC\'s Catalog Service for the Web (CSW) standard' } - def get_original_url(self, harvest_object_id): obj = model.Session.query(HarvestObject).\ - filter(HarvestObject.id==harvest_object_id).\ + filter(HarvestObject.id == harvest_object_id).\ first() parts = urlparse.urlparse(obj.source.url) @@ -44,7 +43,7 @@ def get_original_url(self, harvest_object_id): 'VERSION': '2.0.2', 'REQUEST': 'GetRecordById', 'OUTPUTSCHEMA': 'http://www.isotc211.org/2005/gmd', - 'OUTPUTFORMAT':'application/xml' , + 'OUTPUTFORMAT': 'application/xml', 'ID': obj.guid } diff --git a/ckanext/spatial/harvesters/waf.py b/ckanext/spatial/harvesters/waf.py index e9d9d7e3..e26e6b55 100644 --- a/ckanext/spatial/harvesters/waf.py +++ b/ckanext/spatial/harvesters/waf.py @@ -108,7 +108,7 @@ def gather_stage(self, harvest_job, collection_package_id=None): for item in possible_changes: if (not url_to_modified_harvest[item] or not url_to_modified_db[item] # if there is no date assume change or url_to_modified_harvest[item] > url_to_modified_db[item]): - change.append(item) + change.append(item) def create_extras(url, date, status): extras = [HOExtra(key='waf_modified_date', value=date), diff --git a/ckanext/spatial/helpers.py b/ckanext/spatial/helpers.py index 7b205d23..e0ffc9c5 100644 --- a/ckanext/spatial/helpers.py +++ b/ckanext/spatial/helpers.py @@ -39,7 +39,9 @@ def get_responsible_party(value): Examples: [{"name": "Complex Systems Research Center", "roles": ["pointOfContact"]}] - [{"name": "British Geological Survey", "roles": ["custodian", "pointOfContact"]}, {"name": "Natural England", "roles": ["publisher"]}] + + [{"name": "British Geological Survey", "roles": ["custodian", "pointOfContact"]}, + {"name": "Natural England", "roles": ["publisher"]}] Results Complex Systems Research Center (pointOfContact) diff --git a/ckanext/spatial/interfaces.py b/ckanext/spatial/interfaces.py index 7695e8c4..67de1753 100644 --- a/ckanext/spatial/interfaces.py +++ b/ckanext/spatial/interfaces.py @@ -88,4 +88,3 @@ def transform_to_iso(self, original_document, original_format, harvest_object): ''' return None - diff --git a/setup.py b/setup.py index 294d1445..45cc20ef 100644 --- a/setup.py +++ b/setup.py @@ -1,29 +1,27 @@ from setuptools import setup, find_packages -import sys, os version = '0.2' setup( - name='ckanext-spatial', - version=version, - description="Geo-related plugins for CKAN", - long_description="""\ - """, - classifiers=[], # Get strings from http://pypi.python.org/pypi?%3Aaction=list_classifiers - keywords='', - author='Open Knowledge Foundation', - author_email='info@okfn.org', - url='http://okfn.org', - license='AGPL', - packages=find_packages(exclude=['ez_setup', 'examples', 'tests']), - namespace_packages=['ckanext'], - include_package_data=True, - zip_safe=False, - install_requires=[ - # -*- Extra requirements: -*- - ], - entry_points=\ - """ + name='ckanext-spatial', + version=version, + description="Geo-related plugins for CKAN", + long_description="""\ + """, + classifiers=[], # Get strings from http://pypi.python.org/pypi?%3Aaction=list_classifiers + keywords='', + author='Open Knowledge Foundation', + author_email='info@okfn.org', + url='http://okfn.org', + license='AGPL', + packages=find_packages(exclude=['ez_setup', 'examples', 'tests']), + namespace_packages=['ckanext'], + include_package_data=True, + zip_safe=False, + install_requires=[ + # -*- Extra requirements: -*- + ], + entry_points=""" [ckan.plugins] spatial_metadata=ckanext.spatial.plugin:SpatialMetadata spatial_query=ckanext.spatial.plugin:SpatialQuery @@ -46,5 +44,5 @@ [ckan.test_plugins] test_spatial_plugin = ckanext.spatial.tests.test_plugin.plugin:TestSpatialPlugin - """, + """, ) From 3a4c7327b76f5c5050b8381515e24d67b3c93bce Mon Sep 17 00:00:00 2001 From: Jari Voutilainen Date: Thu, 28 Feb 2019 11:45:00 +0200 Subject: [PATCH 05/12] Add flake8 job to travis --- .travis.yml | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/.travis.yml b/.travis.yml index 6db595f8..3ff6b014 100644 --- a/.travis.yml +++ b/.travis.yml @@ -24,3 +24,23 @@ branches: except: - stable - release-v2.0 + + +stages: + - Flake8 + - test + +jobs: + include: + - stage: Flake8 + env: Flake8=True + install: + - bash bin/travis-build.bash + - pip install flake8==3.5.0 + - pip install pycodestyle==2.3.0 + script: + - flake8 --version + # stop the build if there are Python syntax errors or undefined names + - flake8 . --count --select=E901,E999,F821,F822,F823 --show-source --statistics --exclude ckan + # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide + - flake8 . --count --max-line-length=127 --statistics --exclude ckan From ab699fa8e60c7f15eed1b13d10a2a8e1ce0beb4f Mon Sep 17 00:00:00 2001 From: Jari Voutilainen Date: Thu, 28 Feb 2019 12:51:12 +0200 Subject: [PATCH 06/12] Apply flake8 style --- bin/ckan_pycsw.py | 4 +- ckanext/spatial/controllers/api.py | 4 +- ckanext/spatial/harvesters/base.py | 21 ++- ckanext/spatial/harvesters/csw.py | 7 +- ckanext/spatial/harvesters/doc.py | 7 +- ckanext/spatial/harvesters/gemini.py | 23 ++- ckanext/spatial/harvesters/waf.py | 8 +- ckanext/spatial/helpers.py | 3 +- ckanext/spatial/lib/__init__.py | 6 +- ckanext/spatial/model/harvested_metadata.py | 177 ++++++++++++------ ckanext/spatial/plugin.py | 6 +- ckanext/spatial/tests/base.py | 7 +- .../tests/model/test_harvested_metadata.py | 3 +- ckanext/spatial/tests/test_harvest.py | 92 +++++---- ckanext/spatial/tests/test_validation.py | 13 +- ckanext/spatial/validation/validation.py | 2 +- 16 files changed, 236 insertions(+), 147 deletions(-) diff --git a/bin/ckan_pycsw.py b/bin/ckan_pycsw.py index 1c68b79c..80e61626 100644 --- a/bin/ckan_pycsw.py +++ b/bin/ckan_pycsw.py @@ -81,7 +81,7 @@ def load(pycsw_config, ckan_url): response = requests.get(url) listing = response.json() if not isinstance(listing, dict): - raise RuntimeError, 'Wrong API response: %s' % listing + raise RuntimeError('Wrong API response: %s' % listing) results = listing.get('results') if not results: break @@ -151,7 +151,7 @@ def load(pycsw_config, ckan_url): log.info('Changed %s' % ckan_id) except Exception, err: repo.session.rollback() - raise RuntimeError, 'ERROR: %s' % str(err) + raise RuntimeError('ERROR: %s' % str(err)) def clear(pycsw_config): diff --git a/ckanext/spatial/controllers/api.py b/ckanext/spatial/controllers/api.py index b0cb961b..b4daf6cd 100644 --- a/ckanext/spatial/controllers/api.py +++ b/ckanext/spatial/controllers/api.py @@ -26,7 +26,7 @@ def spatial_query(self): error_400_msg = \ 'Please provide a suitable bbox parameter [minx,miny,maxx,maxy]' - if not 'bbox' in request.params: + if 'bbox' not in request.params: abort(400, error_400_msg) bbox = validate_bbox(request.params['bbox']) @@ -127,7 +127,7 @@ def display_xml_original(self, id): response.headers['Content-Type'] = 'application/xml; charset=utf-8' response.headers['Content-Length'] = len(content) - if not '\n' + content return content.encode('utf-8') diff --git a/ckanext/spatial/harvesters/base.py b/ckanext/spatial/harvesters/base.py index 55dd4389..ec112f66 100644 --- a/ckanext/spatial/harvesters/base.py +++ b/ckanext/spatial/harvesters/base.py @@ -234,7 +234,8 @@ def get_package_dict(self, context, data_dict): if not name: name = self._gen_new_name(str(iso_values['guid'])) if not name: - raise Exception('Could not generate a unique name from the title or the GUID. Please choose a more unique title.') + raise Exception('Could not generate a unique name from the title or the GUID. ' + 'Please choose a more unique title.') package_dict['name'] = name else: package_dict['name'] = package.name @@ -397,7 +398,7 @@ def _extract_first_license_url(licences): override_extras = self.source_config.get('override_extras', False) for key, value in default_extras.iteritems(): log.debug('Processing extra %s', key) - if not key in extras or override_extras: + if key not in extras or override_extras: # Look for replacement strings if isinstance(value, basestring): value = value.format(harvest_source_id=harvest_object.job.source.id, @@ -450,8 +451,7 @@ def import_stage(self, harvest_object): # Get the last harvested object (if any) previous_object = model.Session.query(HarvestObject) \ .filter(HarvestObject.guid == harvest_object.guid) \ - .filter(HarvestObject.current == True) \ - .first() + .filter(HarvestObject.current == True).first() # noqa if status == 'delete': # Delete package @@ -492,7 +492,8 @@ def import_stage(self, harvest_object): if not is_valid: # If validation errors were found, import will stop unless # configuration per source or per instance says otherwise - continue_import = p.toolkit.asbool(config.get('ckanext.spatial.harvest.continue_on_validation_errors', False)) or \ + continue_import = p.toolkit.asbool(config.get('ckanext.spatial.harvest.continue_on_validation_errors', + False)) or \ self.source_config.get('continue_on_validation_errors') if not continue_import: return False @@ -519,8 +520,7 @@ def import_stage(self, harvest_object): # with the same guid existing_object = model.Session.query(HarvestObject.id) \ .filter(HarvestObject.guid == iso_guid) \ - .filter(HarvestObject.current == True) \ - .first() + .filter(HarvestObject.current == True).first() # noqa if existing_object: self._save_object_error('Object {0} already has this guid {1}'.format(existing_object.id, iso_guid), harvest_object, 'Import') @@ -606,8 +606,8 @@ def import_stage(self, harvest_object): elif status == 'change': # Check if the modified date is more recent - if not self.force_import and previous_object - and harvest_object.metadata_modified_date <= previous_object.metadata_modified_date: + if not self.force_import and previous_object \ + and harvest_object.metadata_modified_date <= previous_object.metadata_modified_date: # Assign the previous job id to the new object to # avoid losing history @@ -815,7 +815,8 @@ def _validate_document(self, document_string, harvest_object, validator=None): valid, profile, errors = validator.is_valid(xml) if not valid: - log.error('Validation errors found using profile {0} for object with GUID {1}'.format(profile, harvest_object.guid)) + log.error('Validation errors found using profile {0} for object with GUID {1}' + .format(profile, harvest_object.guid)) for error in errors: self._save_object_error(error[0], harvest_object, 'Validation', line=error[1]) diff --git a/ckanext/spatial/harvesters/csw.py b/ckanext/spatial/harvesters/csw.py index 9bb246ec..f82f521c 100644 --- a/ckanext/spatial/harvesters/csw.py +++ b/ckanext/spatial/harvesters/csw.py @@ -75,9 +75,10 @@ def gather_stage(self, harvest_job): self._save_gather_error('Error contacting the CSW server: %s' % e, harvest_job) return None - query = model.Session.query(HarvestObject.guid, HarvestObject.package_id).\ - filter(HarvestObject.current == True).\ - filter(HarvestObject.harvest_source_id == harvest_job.source.id) + query = model.Session.query(HarvestObject.guid, HarvestObject.package_id). \ + filter(HarvestObject.harvest_source_id == harvest_job.source.id).\ + filter(HarvestObject.current == True) # noqa + guid_to_package_id = {} for guid, package_id in query: diff --git a/ckanext/spatial/harvesters/doc.py b/ckanext/spatial/harvesters/doc.py index 3b71893c..1b935343 100644 --- a/ckanext/spatial/harvesters/doc.py +++ b/ckanext/spatial/harvesters/doc.py @@ -55,10 +55,9 @@ def gather_stage(self, harvest_job): (url, e), harvest_job) return None - existing_object = model.Session.query(HarvestObject.guid, HarvestObject.package_id).\ - filter(HarvestObject.current == True).\ - filter(HarvestObject.harvest_source_id == harvest_job.source.id).\ - first() + existing_object = model.Session.query(HarvestObject.guid, HarvestObject.package_id). \ + filter(HarvestObject.harvest_source_id == harvest_job.source.id). \ + filter(HarvestObject.current == True).first() # noqa def create_extras(url, status): return [HOExtra(key='doc_location', value=url), diff --git a/ckanext/spatial/harvesters/gemini.py b/ckanext/spatial/harvesters/gemini.py index 0ed9ba36..66f278cb 100644 --- a/ckanext/spatial/harvesters/gemini.py +++ b/ckanext/spatial/harvesters/gemini.py @@ -120,7 +120,7 @@ def write_package_from_gemini_string(self, content): except ValueError: try: metadata_modified_date = datetime.strptime(gemini_values['metadata-date'], '%Y-%m-%dT%H:%M:%S') - except: + except Exception: raise Exception('Could not extract reference date for GUID %s (%s)' % (gemini_guid, gemini_values['metadata-date'])) @@ -129,8 +129,7 @@ def write_package_from_gemini_string(self, content): last_harvested_object = Session.query(HarvestObject) \ .filter(HarvestObject.guid == gemini_guid) \ - .filter(HarvestObject.current == True) \ - .all() + .filter(HarvestObject.current == True).all() # noqa if len(last_harvested_object) == 1: last_harvested_object = last_harvested_object[0] @@ -163,7 +162,8 @@ def write_package_from_gemini_string(self, content): log.info('Package for object with GUID %s will be re-activated' % gemini_guid) reactivate_package = True else: - log.info('Remote record with GUID %s is not more recent than a deleted package, skipping... ' % gemini_guid) + log.info('Remote record with GUID %s is not more recent than a deleted package, skipping... ' + % gemini_guid) return None else: @@ -222,10 +222,10 @@ def write_package_from_gemini_string(self, content): extras['licence_url'] = licence_url_extracted extras['access_constraints'] = gemini_values.get('limitations-on-public-access', '') - if gemini_values.has_key('temporal-extent-begin'): + if 'temporal-extent-begin' in gemini_values: # gemini_values['temporal-extent-begin'].sort() extras['temporal_coverage-from'] = gemini_values['temporal-extent-begin'] - if gemini_values.has_key('temporal-extent-end'): + if 'temporal-extent-end' in gemini_values: # gemini_values['temporal-extent-end'].sort() extras['temporal_coverage-to'] = gemini_values['temporal-extent-end'] @@ -274,7 +274,8 @@ def write_package_from_gemini_string(self, content): if not name: name = self.gen_new_name(str(gemini_guid)) if not name: - raise Exception('Could not generate a unique name from the title or the GUID. Please choose a more unique title.') + raise Exception('Could not generate a unique name from the title or the GUID. ' + 'Please choose a more unique title.') package_dict['name'] = name else: package_dict['name'] = package.name @@ -324,7 +325,7 @@ def write_package_from_gemini_string(self, content): package_dict['extras'] = extras_as_dict - if package == None: + if package is None: # Create new package from data. package = self._create_package_from_data(package_dict) log.info('Created new package ID %s with GEMINI guid %s', package['id'], gemini_guid) @@ -503,7 +504,8 @@ def get_gemini_string_and_guid(self, content, url=None): gemini_xml = xml.find(metadata_tag) if gemini_xml is None: - self._save_gather_error('Content is not a valid Gemini document without the gmd:MD_Metadata element', self.harvest_job) + self._save_gather_error('Content is not a valid Gemini document without the gmd:MD_Metadata element', + self.harvest_job) gemini_string = etree.tostring(gemini_xml) gemini_document = GeminiDocument(gemini_string) @@ -667,7 +669,8 @@ def gather_stage(self, harvest_job): self._save_gather_error('Could not get the GUID for source %s' % url, harvest_job) return None except Exception, e: - self._save_gather_error('Error parsing the document. Is this a valid Gemini document?: %s [%r]' % (url, e), harvest_job) + self._save_gather_error('Error parsing the document. Is this a valid Gemini document?: %s [%r]' + % (url, e), harvest_job) if debug_exception_mode: raise return None diff --git a/ckanext/spatial/harvesters/waf.py b/ckanext/spatial/harvesters/waf.py index e26e6b55..e6f7ef84 100644 --- a/ckanext/spatial/harvesters/waf.py +++ b/ckanext/spatial/harvesters/waf.py @@ -76,9 +76,9 @@ def gather_stage(self, harvest_job, collection_package_id=None): join(HOExtraAlias1, HarvestObject.extras).\ join(HOExtraAlias2, HarvestObject.extras).\ filter(HOExtraAlias1.key == 'waf_modified_date').\ - filter(HOExtraAlias2.key == 'waf_location').\ - filter(HarvestObject.current == True).\ - filter(HarvestObject.harvest_source_id == harvest_job.source.id) + filter(HOExtraAlias2.key == 'waf_location'). \ + filter(HarvestObject.harvest_source_id == harvest_job.source.id).\ + filter(HarvestObject.current == True) # noqa for guid, package_id, modified_date, url in query: url_to_modified_db[url] = modified_date @@ -107,7 +107,7 @@ def gather_stage(self, harvest_job, collection_package_id=None): for item in possible_changes: if (not url_to_modified_harvest[item] or not url_to_modified_db[item] # if there is no date assume change - or url_to_modified_harvest[item] > url_to_modified_db[item]): + or url_to_modified_harvest[item] > url_to_modified_db[item]): change.append(item) def create_extras(url, date, status): diff --git a/ckanext/spatial/helpers.py b/ckanext/spatial/helpers.py index e0ffc9c5..6ce8ddfa 100644 --- a/ckanext/spatial/helpers.py +++ b/ckanext/spatial/helpers.py @@ -57,7 +57,8 @@ def get_responsible_party(value): out = [] parties = h.json.loads(value) for party in parties: - roles = [formatted[role] if role in formatted.keys() else p.toolkit._(role.capitalize()) for role in party['roles']] + roles = [formatted[role] if role in formatted.keys() + else p.toolkit._(role.capitalize()) for role in party['roles']] out.append('{0} ({1})'.format(party['name'], ', '.join(roles))) return '; '.join(out) except (ValueError, TypeError): diff --git a/ckanext/spatial/lib/__init__.py b/ckanext/spatial/lib/__init__.py index 9f7dd291..2d140e6b 100644 --- a/ckanext/spatial/lib/__init__.py +++ b/ckanext/spatial/lib/__init__.py @@ -109,7 +109,7 @@ def validate_bbox(bbox_values): bbox['miny'] = float(bbox_values[1]) bbox['maxx'] = float(bbox_values[2]) bbox['maxy'] = float(bbox_values[3]) - except ValueError, e: + except ValueError: return None return bbox @@ -180,7 +180,9 @@ def bbox_query_ordered(bbox, srid=None): # Uses spatial ranking method from "USGS - 2006-1279" (Lanfear) sql = """SELECT ST_AsBinary(package_extent.the_geom) AS package_extent_the_geom, - POWER(ST_Area(ST_Intersection(package_extent.the_geom, ST_GeomFromText(:query_bbox, :query_srid))),2)/ST_Area(package_extent.the_geom)/:search_area as spatial_ranking, + POWER(ST_Area(ST_Intersection(package_extent.the_geom, + ST_GeomFromText(:query_bbox, :query_srid))),2) + /ST_Area(package_extent.the_geom)/:search_area as spatial_ranking, package_extent.package_id AS package_id FROM package_extent, package WHERE package_extent.package_id = package.id diff --git a/ckanext/spatial/model/harvested_metadata.py b/ckanext/spatial/model/harvested_metadata.py index 305128b4..a7e43afb 100644 --- a/ckanext/spatial/model/harvested_metadata.py +++ b/ckanext/spatial/model/harvested_metadata.py @@ -67,7 +67,7 @@ def read_value(self, tree): return self.fix_multiplicity(values) def get_search_paths(self): - if type(self.search_paths) != type([]): + if not isinstance(self.search_paths, list): search_paths = [self.search_paths] else: search_paths = self.search_paths @@ -524,49 +524,62 @@ class ISODocument(MappedXmlDocument): ISOElement( name="spatial-reference-system", search_paths=[ - "gmd:referenceSystemInfo/gmd:MD_ReferenceSystem/gmd:referenceSystemIdentifier/gmd:RS_Identifier/gmd:code/gco:CharacterString/text()", + "gmd:referenceSystemInfo/gmd:MD_ReferenceSystem/gmd:referenceSystemIdentifier/" + "gmd:RS_Identifier/gmd:code/gco:CharacterString/text()", ], multiplicity="0..1", ), ISOElement( name="title", search_paths=[ - "gmd:identificationInfo/gmd:MD_DataIdentification/gmd:citation/gmd:CI_Citation/gmd:title/gco:CharacterString/text()", - "gmd:identificationInfo/srv:SV_ServiceIdentification/gmd:citation/gmd:CI_Citation/gmd:title/gco:CharacterString/text()", + "gmd:identificationInfo/gmd:MD_DataIdentification/gmd:citation/" + "gmd:CI_Citation/gmd:title/gco:CharacterString/text()", + "gmd:identificationInfo/srv:SV_ServiceIdentification/gmd:citation/" + "gmd:CI_Citation/gmd:title/gco:CharacterString/text()", ], multiplicity="1", ), ISOElement( name="alternate-title", search_paths=[ - "gmd:identificationInfo/gmd:MD_DataIdentification/gmd:citation/gmd:CI_Citation/gmd:alternateTitle/gco:CharacterString/text()", - "gmd:identificationInfo/srv:SV_ServiceIdentification/gmd:citation/gmd:CI_Citation/gmd:alternateTitle/gco:CharacterString/text()", + "gmd:identificationInfo/gmd:MD_DataIdentification/gmd:citation/" + "gmd:CI_Citation/gmd:alternateTitle/gco:CharacterString/text()", + "gmd:identificationInfo/srv:SV_ServiceIdentification/gmd:citation/" + "gmd:CI_Citation/gmd:alternateTitle/gco:CharacterString/text()", ], multiplicity="*", ), ISOReferenceDate( name="dataset-reference-date", search_paths=[ - "gmd:identificationInfo/gmd:MD_DataIdentification/gmd:citation/gmd:CI_Citation/gmd:date/gmd:CI_Date", - "gmd:identificationInfo/srv:SV_ServiceIdentification/gmd:citation/gmd:CI_Citation/gmd:date/gmd:CI_Date", + "gmd:identificationInfo/gmd:MD_DataIdentification/" + "gmd:citation/gmd:CI_Citation/gmd:date/gmd:CI_Date", + "gmd:identificationInfo/srv:SV_ServiceIdentification/" + "gmd:citation/gmd:CI_Citation/gmd:date/gmd:CI_Date", ], multiplicity="1..*", ), ISOElement( name="unique-resource-identifier", search_paths=[ - "gmd:identificationInfo/gmd:MD_DataIdentification/gmd:citation/gmd:CI_Citation/gmd:identifier/gmd:MD_Identifier/gmd:code/gco:CharacterString/text()", - "gmd:identificationInfo/gmd:SV_ServiceIdentification/gmd:citation/gmd:CI_Citation/gmd:identifier/gmd:MD_Identifier/gmd:code/gco:CharacterString/text()", + "gmd:identificationInfo/gmd:MD_DataIdentification/gmd:citation/" + "gmd:CI_Citation/gmd:identifier/gmd:MD_Identifier/gmd:code/gco:CharacterString/text()", + "gmd:identificationInfo/gmd:SV_ServiceIdentification/gmd:citation/" + "gmd:CI_Citation/gmd:identifier/gmd:MD_Identifier/gmd:code/gco:CharacterString/text()", ], multiplicity="0..1", ), ISOElement( name="presentation-form", search_paths=[ - "gmd:identificationInfo/gmd:MD_DataIdentification/gmd:citation/gmd:CI_Citation/gmd:presentationForm/gmd:CI_PresentationFormCode/text()", - "gmd:identificationInfo/gmd:MD_DataIdentification/gmd:citation/gmd:CI_Citation/gmd:presentationForm/gmd:CI_PresentationFormCode/@codeListValue", - "gmd:identificationInfo/srv:SV_ServiceIdentification/gmd:citation/gmd:CI_Citation/gmd:presentationForm/gmd:CI_PresentationFormCode/text()", - "gmd:identificationInfo/srv:SV_ServiceIdentification/gmd:citation/gmd:CI_Citation/gmd:presentationForm/gmd:CI_PresentationFormCode/@codeListValue", + "gmd:identificationInfo/gmd:MD_DataIdentification/gmd:citation/" + "gmd:CI_Citation/gmd:presentationForm/gmd:CI_PresentationFormCode/text()", + "gmd:identificationInfo/gmd:MD_DataIdentification/gmd:citation/" + "gmd:CI_Citation/gmd:presentationForm/gmd:CI_PresentationFormCode/@codeListValue", + "gmd:identificationInfo/srv:SV_ServiceIdentification/gmd:citation/" + "gmd:CI_Citation/gmd:presentationForm/gmd:CI_PresentationFormCode/text()", + "gmd:identificationInfo/srv:SV_ServiceIdentification/gmd:citation/" + "gmd:CI_Citation/gmd:presentationForm/gmd:CI_PresentationFormCode/@codeListValue", ], multiplicity="*", @@ -599,18 +612,27 @@ class ISODocument(MappedXmlDocument): ISOElement( name="frequency-of-update", search_paths=[ - "gmd:identificationInfo/gmd:MD_DataIdentification/gmd:resourceMaintenance/gmd:MD_MaintenanceInformation/gmd:maintenanceAndUpdateFrequency/gmd:MD_MaintenanceFrequencyCode/@codeListValue", - "gmd:identificationInfo/srv:SV_ServiceIdentification/gmd:resourceMaintenance/gmd:MD_MaintenanceInformation/gmd:maintenanceAndUpdateFrequency/gmd:MD_MaintenanceFrequencyCode/@codeListValue", - "gmd:identificationInfo/gmd:MD_DataIdentification/gmd:resourceMaintenance/gmd:MD_MaintenanceInformation/gmd:maintenanceAndUpdateFrequency/gmd:MD_MaintenanceFrequencyCode/text()", - "gmd:identificationInfo/srv:SV_ServiceIdentification/gmd:resourceMaintenance/gmd:MD_MaintenanceInformation/gmd:maintenanceAndUpdateFrequency/gmd:MD_MaintenanceFrequencyCode/text()", + "gmd:identificationInfo/gmd:MD_DataIdentification/gmd:resourceMaintenance/" + "gmd:MD_MaintenanceInformation/gmd:maintenanceAndUpdateFrequency/" + "gmd:MD_MaintenanceFrequencyCode/@codeListValue", + "gmd:identificationInfo/srv:SV_ServiceIdentification/gmd:resourceMaintenance/" + "gmd:MD_MaintenanceInformation/gmd:maintenanceAndUpdateFrequency/" + "gmd:MD_MaintenanceFrequencyCode/@codeListValue", + "gmd:identificationInfo/gmd:MD_DataIdentification/gmd:resourceMaintenance/" + "gmd:MD_MaintenanceInformation/gmd:maintenanceAndUpdateFrequency/" + "gmd:MD_MaintenanceFrequencyCode/text()", + "gmd:identificationInfo/srv:SV_ServiceIdentification/gmd:resourceMaintenance/" + "gmd:MD_MaintenanceInformation/gmd:maintenanceAndUpdateFrequency/gmd:MD_MaintenanceFrequencyCode/text()", ], multiplicity="0..1", ), ISOElement( name="maintenance-note", search_paths=[ - "gmd:identificationInfo/gmd:MD_DataIdentification/gmd:resourceMaintenance/gmd:MD_MaintenanceInformation/gmd:maintenanceNote/gco:CharacterString/text()", - "gmd:identificationInfo/gmd:SV_ServiceIdentification/gmd:resourceMaintenance/gmd:MD_MaintenanceInformation/gmd:maintenanceNote/gco:CharacterString/text()", + "gmd:identificationInfo/gmd:MD_DataIdentification/gmd:resourceMaintenance/" + "gmd:MD_MaintenanceInformation/gmd:maintenanceNote/gco:CharacterString/text()", + "gmd:identificationInfo/gmd:SV_ServiceIdentification/gmd:resourceMaintenance/" + "gmd:MD_MaintenanceInformation/gmd:maintenanceNote/gco:CharacterString/text()", ], multiplicity="0..1", ), @@ -635,8 +657,10 @@ class ISODocument(MappedXmlDocument): ISOElement( name="keyword-inspire-theme", search_paths=[ - "gmd:identificationInfo/gmd:MD_DataIdentification/gmd:descriptiveKeywords/gmd:MD_Keywords/gmd:keyword/gco:CharacterString/text()", - "gmd:identificationInfo/srv:SV_ServiceIdentification/gmd:descriptiveKeywords/gmd:MD_Keywords/gmd:keyword/gco:CharacterString/text()", + "gmd:identificationInfo/gmd:MD_DataIdentification/gmd:descriptiveKeywords/" + "gmd:MD_Keywords/gmd:keyword/gco:CharacterString/text()", + "gmd:identificationInfo/srv:SV_ServiceIdentification/gmd:descriptiveKeywords/" + "gmd:MD_Keywords/gmd:keyword/gco:CharacterString/text()", ], multiplicity="*", ), @@ -644,7 +668,8 @@ class ISODocument(MappedXmlDocument): ISOElement( name="keyword-controlled-other", search_paths=[ - "gmd:identificationInfo/srv:SV_ServiceIdentification/srv:keywords/gmd:MD_Keywords/gmd:keyword/gco:CharacterString/text()", + "gmd:identificationInfo/srv:SV_ServiceIdentification/srv:keywords/" + "gmd:MD_Keywords/gmd:keyword/gco:CharacterString/text()", ], multiplicity="*", ), @@ -659,18 +684,24 @@ class ISODocument(MappedXmlDocument): ISOElement( name="limitations-on-public-access", search_paths=[ - "gmd:identificationInfo/gmd:MD_DataIdentification/gmd:resourceConstraints/gmd:MD_LegalConstraints/gmd:otherConstraints/gco:CharacterString/text()", - "gmd:identificationInfo/srv:SV_ServiceIdentification/gmd:resourceConstraints/gmd:MD_LegalConstraints/gmd:otherConstraints/gco:CharacterString/text()", + "gmd:identificationInfo/gmd:MD_DataIdentification/gmd:resourceConstraints/" + "gmd:MD_LegalConstraints/gmd:otherConstraints/gco:CharacterString/text()", + "gmd:identificationInfo/srv:SV_ServiceIdentification/gmd:resourceConstraints/" + "gmd:MD_LegalConstraints/gmd:otherConstraints/gco:CharacterString/text()", ], multiplicity="*", ), ISOElement( name="access-constraints", search_paths=[ - "gmd:identificationInfo/gmd:MD_DataIdentification/gmd:resourceConstraints/gmd:MD_LegalConstraints/gmd:accessConstraints/gmd:MD_RestrictionCode/@codeListValue", - "gmd:identificationInfo/srv:SV_ServiceIdentification/gmd:resourceConstraints/gmd:MD_LegalConstraints/gmd:accessConstraints/gmd:MD_RestrictionCode/@codeListValue", - "gmd:identificationInfo/gmd:MD_DataIdentification/gmd:resourceConstraints/gmd:MD_LegalConstraints/gmd:accessConstraints/gmd:MD_RestrictionCode/text()", - "gmd:identificationInfo/srv:SV_ServiceIdentification/gmd:resourceConstraints/gmd:MD_LegalConstraints/gmd:accessConstraints/gmd:MD_RestrictionCode/text()", + "gmd:identificationInfo/gmd:MD_DataIdentification/gmd:resourceConstraints/" + "gmd:MD_LegalConstraints/gmd:accessConstraints/gmd:MD_RestrictionCode/@codeListValue", + "gmd:identificationInfo/srv:SV_ServiceIdentification/gmd:resourceConstraints/" + "gmd:MD_LegalConstraints/gmd:accessConstraints/gmd:MD_RestrictionCode/@codeListValue", + "gmd:identificationInfo/gmd:MD_DataIdentification/gmd:resourceConstraints/" + "gmd:MD_LegalConstraints/gmd:accessConstraints/gmd:MD_RestrictionCode/text()", + "gmd:identificationInfo/srv:SV_ServiceIdentification/gmd:resourceConstraints/" + "gmd:MD_LegalConstraints/gmd:accessConstraints/gmd:MD_RestrictionCode/text()", ], multiplicity="*", ), @@ -678,8 +709,10 @@ class ISODocument(MappedXmlDocument): ISOElement( name="use-constraints", search_paths=[ - "gmd:identificationInfo/gmd:MD_DataIdentification/gmd:resourceConstraints/gmd:MD_Constraints/gmd:useLimitation/gco:CharacterString/text()", - "gmd:identificationInfo/srv:SV_ServiceIdentification/gmd:resourceConstraints/gmd:MD_Constraints/gmd:useLimitation/gco:CharacterString/text()", + "gmd:identificationInfo/gmd:MD_DataIdentification/gmd:resourceConstraints/" + "gmd:MD_Constraints/gmd:useLimitation/gco:CharacterString/text()", + "gmd:identificationInfo/srv:SV_ServiceIdentification/gmd:resourceConstraints/" + "gmd:MD_Constraints/gmd:useLimitation/gco:CharacterString/text()", ], multiplicity="*", ), @@ -701,24 +734,30 @@ class ISODocument(MappedXmlDocument): ISOElement( name="spatial-resolution", search_paths=[ - "gmd:identificationInfo/gmd:MD_DataIdentification/gmd:spatialResolution/gmd:MD_Resolution/gmd:distance/gco:Distance/text()", - "gmd:identificationInfo/srv:SV_ServiceIdentification/gmd:spatialResolution/gmd:MD_Resolution/gmd:distance/gco:Distance/text()", + "gmd:identificationInfo/gmd:MD_DataIdentification/gmd:spatialResolution/" + "gmd:MD_Resolution/gmd:distance/gco:Distance/text()", + "gmd:identificationInfo/srv:SV_ServiceIdentification/gmd:spatialResolution/" + "gmd:MD_Resolution/gmd:distance/gco:Distance/text()", ], multiplicity="0..1", ), ISOElement( name="spatial-resolution-units", search_paths=[ - "gmd:identificationInfo/gmd:MD_DataIdentification/gmd:spatialResolution/gmd:MD_Resolution/gmd:distance/gco:Distance/@uom", - "gmd:identificationInfo/srv:SV_ServiceIdentification/gmd:spatialResolution/gmd:MD_Resolution/gmd:distance/gco:Distance/@uom", + "gmd:identificationInfo/gmd:MD_DataIdentification/gmd:spatialResolution/" + "gmd:MD_Resolution/gmd:distance/gco:Distance/@uom", + "gmd:identificationInfo/srv:SV_ServiceIdentification/gmd:spatialResolution/" + "gmd:MD_Resolution/gmd:distance/gco:Distance/@uom", ], multiplicity="0..1", ), ISOElement( name="equivalent-scale", search_paths=[ - "gmd:identificationInfo/gmd:MD_DataIdentification/gmd:spatialResolution/gmd:MD_Resolution/gmd:equivalentScale/gmd:MD_RepresentativeFraction/gmd:denominator/gco:Integer/text()", - "gmd:identificationInfo/srv:SV_ServiceIdentification/gmd:spatialResolution/gmd:MD_Resolution/gmd:equivalentScale/gmd:MD_RepresentativeFraction/gmd:denominator/gco:Integer/text()", + "gmd:identificationInfo/gmd:MD_DataIdentification/gmd:spatialResolution/" + "gmd:MD_Resolution/gmd:equivalentScale/gmd:MD_RepresentativeFraction/gmd:denominator/gco:Integer/text()", + "gmd:identificationInfo/srv:SV_ServiceIdentification/gmd:spatialResolution/" + "gmd:MD_Resolution/gmd:equivalentScale/gmd:MD_RepresentativeFraction/gmd:denominator/gco:Integer/text()", ], multiplicity="*", ), @@ -749,44 +788,60 @@ class ISODocument(MappedXmlDocument): ISOElement( name="extent-free-text", search_paths=[ - "gmd:identificationInfo/gmd:MD_DataIdentification/gmd:extent/gmd:EX_Extent/gmd:geographicElement/gmd:EX_GeographicDescription/gmd:geographicIdentifier/gmd:MD_Identifier/gmd:code/gco:CharacterString/text()", - "gmd:identificationInfo/srv:SV_ServiceIdentification/srv:extent/gmd:EX_Extent/gmd:geographicElement/gmd:EX_GeographicDescription/gmd:geographicIdentifier/gmd:MD_Identifier/gmd:code/gco:CharacterString/text()", + "gmd:identificationInfo/gmd:MD_DataIdentification/gmd:extent/gmd:EX_Extent/" + "gmd:geographicElement/gmd:EX_GeographicDescription/gmd:geographicIdentifier/" + "gmd:MD_Identifier/gmd:code/gco:CharacterString/text()", + "gmd:identificationInfo/srv:SV_ServiceIdentification/srv:extent/gmd:EX_Extent/" + "gmd:geographicElement/gmd:EX_GeographicDescription/gmd:geographicIdentifier/" + "gmd:MD_Identifier/gmd:code/gco:CharacterString/text()", ], multiplicity="*", ), ISOBoundingBox( name="bbox", search_paths=[ - "gmd:identificationInfo/gmd:MD_DataIdentification/gmd:extent/gmd:EX_Extent/gmd:geographicElement/gmd:EX_GeographicBoundingBox", - "gmd:identificationInfo/srv:SV_ServiceIdentification/srv:extent/gmd:EX_Extent/gmd:geographicElement/gmd:EX_GeographicBoundingBox", + "gmd:identificationInfo/gmd:MD_DataIdentification/gmd:extent/gmd:EX_Extent/" + "gmd:geographicElement/gmd:EX_GeographicBoundingBox", + "gmd:identificationInfo/srv:SV_ServiceIdentification/srv:extent/gmd:EX_Extent/" + "gmd:geographicElement/gmd:EX_GeographicBoundingBox", ], multiplicity="*", ), ISOElement( name="temporal-extent-begin", search_paths=[ - "gmd:identificationInfo/gmd:MD_DataIdentification/gmd:extent/gmd:EX_Extent/gmd:temporalElement/gmd:EX_TemporalExtent/gmd:extent/gml:TimePeriod/gml:beginPosition/text()", - "gmd:identificationInfo/gmd:MD_DataIdentification/gmd:extent/gmd:EX_Extent/gmd:temporalElement/gmd:EX_TemporalExtent/gmd:extent/gml32:TimePeriod/gml32:beginPosition/text()", - "gmd:identificationInfo/srv:SV_ServiceIdentification/srv:extent/gmd:EX_Extent/gmd:temporalElement/gmd:EX_TemporalExtent/gmd:extent/gml:TimePeriod/gml:beginPosition/text()", - "gmd:identificationInfo/srv:SV_ServiceIdentification/srv:extent/gmd:EX_Extent/gmd:temporalElement/gmd:EX_TemporalExtent/gmd:extent/gml32:TimePeriod/gml32:beginPosition/text()", + "gmd:identificationInfo/gmd:MD_DataIdentification/gmd:extent/gmd:EX_Extent/" + "gmd:temporalElement/gmd:EX_TemporalExtent/gmd:extent/gml:TimePeriod/gml:beginPosition/text()", + "gmd:identificationInfo/gmd:MD_DataIdentification/gmd:extent/gmd:EX_Extent/" + "gmd:temporalElement/gmd:EX_TemporalExtent/gmd:extent/gml32:TimePeriod/gml32:beginPosition/text()", + "gmd:identificationInfo/srv:SV_ServiceIdentification/srv:extent/gmd:EX_Extent/" + "gmd:temporalElement/gmd:EX_TemporalExtent/gmd:extent/gml:TimePeriod/gml:beginPosition/text()", + "gmd:identificationInfo/srv:SV_ServiceIdentification/srv:extent/gmd:EX_Extent/" + "gmd:temporalElement/gmd:EX_TemporalExtent/gmd:extent/gml32:TimePeriod/gml32:beginPosition/text()", ], multiplicity="*", ), ISOElement( name="temporal-extent-end", search_paths=[ - "gmd:identificationInfo/gmd:MD_DataIdentification/gmd:extent/gmd:EX_Extent/gmd:temporalElement/gmd:EX_TemporalExtent/gmd:extent/gml:TimePeriod/gml:endPosition/text()", - "gmd:identificationInfo/gmd:MD_DataIdentification/gmd:extent/gmd:EX_Extent/gmd:temporalElement/gmd:EX_TemporalExtent/gmd:extent/gml32:TimePeriod/gml32:endPosition/text()", - "gmd:identificationInfo/srv:SV_ServiceIdentification/srv:extent/gmd:EX_Extent/gmd:temporalElement/gmd:EX_TemporalExtent/gmd:extent/gml:TimePeriod/gml:endPosition/text()", - "gmd:identificationInfo/srv:SV_ServiceIdentification/srv:extent/gmd:EX_Extent/gmd:temporalElement/gmd:EX_TemporalExtent/gmd:extent/gml32:TimePeriod/gml32:endPosition/text()", + "gmd:identificationInfo/gmd:MD_DataIdentification/gmd:extent/gmd:EX_Extent/" + "gmd:temporalElement/gmd:EX_TemporalExtent/gmd:extent/gml:TimePeriod/gml:endPosition/text()", + "gmd:identificationInfo/gmd:MD_DataIdentification/gmd:extent/gmd:EX_Extent/" + "gmd:temporalElement/gmd:EX_TemporalExtent/gmd:extent/gml32:TimePeriod/gml32:endPosition/text()", + "gmd:identificationInfo/srv:SV_ServiceIdentification/srv:extent/gmd:EX_Extent/" + "gmd:temporalElement/gmd:EX_TemporalExtent/gmd:extent/gml:TimePeriod/gml:endPosition/text()", + "gmd:identificationInfo/srv:SV_ServiceIdentification/srv:extent/gmd:EX_Extent/" + "gmd:temporalElement/gmd:EX_TemporalExtent/gmd:extent/gml32:TimePeriod/gml32:endPosition/text()", ], multiplicity="*", ), ISOElement( name="vertical-extent", search_paths=[ - "gmd:identificationInfo/gmd:MD_DataIdentification/gmd:extent/gmd:EX_Extent/gmd:verticalElement/gmd:EX_VerticalExtent", - "gmd:identificationInfo/srv:SV_ServiceIdentification/srv:extent/gmd:EX_Extent/gmd:verticalElement/gmd:EX_VerticalExtent", + "gmd:identificationInfo/gmd:MD_DataIdentification/gmd:extent/gmd:EX_Extent/" + "gmd:verticalElement/gmd:EX_VerticalExtent", + "gmd:identificationInfo/srv:SV_ServiceIdentification/srv:extent/gmd:EX_Extent/" + "gmd:verticalElement/gmd:EX_VerticalExtent", ], multiplicity="*", ), @@ -814,15 +869,18 @@ class ISODocument(MappedXmlDocument): ISOResponsibleParty( name="distributor", search_paths=[ - "gmd:distributionInfo/gmd:MD_Distribution/gmd:distributor/gmd:MD_Distributor/gmd:distributorContact/gmd:CI_ResponsibleParty", + "gmd:distributionInfo/gmd:MD_Distribution/gmd:distributor/gmd:MD_Distributor/" + "gmd:distributorContact/gmd:CI_ResponsibleParty", ], multiplicity="*", ), ISOResourceLocator( name="resource-locator", search_paths=[ - "gmd:distributionInfo/gmd:MD_Distribution/gmd:transferOptions/gmd:MD_DigitalTransferOptions/gmd:onLine/gmd:CI_OnlineResource", - "gmd:distributionInfo/gmd:MD_Distribution/gmd:distributor/gmd:MD_Distributor/gmd:distributorTransferOptions/gmd:MD_DigitalTransferOptions/gmd:onLine/gmd:CI_OnlineResource" + "gmd:distributionInfo/gmd:MD_Distribution/gmd:transferOptions/" + "gmd:MD_DigitalTransferOptions/gmd:onLine/gmd:CI_OnlineResource", + "gmd:distributionInfo/gmd:MD_Distribution/gmd:distributor/gmd:MD_Distributor/" + "gmd:distributorTransferOptions/gmd:MD_DigitalTransferOptions/gmd:onLine/gmd:CI_OnlineResource" ], multiplicity="*", ), @@ -836,21 +894,24 @@ class ISODocument(MappedXmlDocument): ISOElement( name="conformity-specification", search_paths=[ - "gmd:dataQualityInfo/gmd:DQ_DataQuality/gmd:report/gmd:DQ_DomainConsistency/gmd:result/gmd:DQ_ConformanceResult/gmd:specification", + "gmd:dataQualityInfo/gmd:DQ_DataQuality/gmd:report/gmd:DQ_DomainConsistency/" + "gmd:result/gmd:DQ_ConformanceResult/gmd:specification", ], multiplicity="0..1", ), ISOElement( name="conformity-pass", search_paths=[ - "gmd:dataQualityInfo/gmd:DQ_DataQuality/gmd:report/gmd:DQ_DomainConsistency/gmd:result/gmd:DQ_ConformanceResult/gmd:pass/gco:Boolean/text()", + "gmd:dataQualityInfo/gmd:DQ_DataQuality/gmd:report/gmd:DQ_DomainConsistency/" + "gmd:result/gmd:DQ_ConformanceResult/gmd:pass/gco:Boolean/text()", ], multiplicity="0..1", ), ISOElement( name="conformity-explanation", search_paths=[ - "gmd:dataQualityInfo/gmd:DQ_DataQuality/gmd:report/gmd:DQ_DomainConsistency/gmd:result/gmd:DQ_ConformanceResult/gmd:explanation/gco:CharacterString/text()", + "gmd:dataQualityInfo/gmd:DQ_DataQuality/gmd:report/gmd:DQ_DomainConsistency/" + "gmd:result/gmd:DQ_ConformanceResult/gmd:explanation/gco:CharacterString/text()", ], multiplicity="0..1", ), @@ -954,7 +1015,7 @@ def infer_contact_email(self, values): for responsible_party in values['responsible-organisation']: if isinstance(responsible_party, dict) and \ isinstance(responsible_party.get('contact-info'), dict) and \ - responsible_party['contact-info'].has_key('email'): + 'email' in responsible_party['contact-info']: value = responsible_party['contact-info']['email'] if value: break diff --git a/ckanext/spatial/plugin.py b/ckanext/spatial/plugin.py index 19015d97..5e5af149 100644 --- a/ckanext/spatial/plugin.py +++ b/ckanext/spatial/plugin.py @@ -24,12 +24,12 @@ def check_geoalchemy_requirement(): if p.toolkit.check_ckan_version(min_version='2.3'): try: - import geoalchemy2 + import geoalchemy2 # noqa except ImportError: raise ImportError(msg.format('geoalchemy2')) else: try: - import geoalchemy + import geoalchemy # noqa except ImportError: raise ImportError(msg.format('geoalchemy')) @@ -182,7 +182,7 @@ def before_index(self, pkg_dict): if pkg_dict.get('extras_spatial', None) and self.search_backend in ('solr', 'solr-spatial-field'): try: geometry = json.loads(pkg_dict['extras_spatial']) - except ValueError, e: + except ValueError: log.error('Geometry not valid GeoJSON, not indexing') return pkg_dict diff --git a/ckanext/spatial/tests/base.py b/ckanext/spatial/tests/base.py index 61a7ec1e..3f7a84a2 100644 --- a/ckanext/spatial/tests/base.py +++ b/ckanext/spatial/tests/base.py @@ -14,10 +14,13 @@ 'point_2': '{"type":"Point","coordinates":[20,10]}', 'line': '{"type":"LineString","coordinates":[[100.0,0.0],[101.0,1.0]]}', 'polygon': '{"type":"Polygon","coordinates":[[[100.0,0.0],[101.0,0.0],[101.0,1.0],[100.0,1.0],[100.0,0.0]]]}', - 'polygon_holes': '{"type":"Polygon","coordinates":[[[100.0,0.0],[101.0,0.0],[101.0,1.0],[100.0,1.0],[100.0,0.0]],[[100.2,0.2],[100.8,0.2],[100.8,0.8],[100.2,0.8],[100.2,0.2]]]}', + 'polygon_holes': '{"type":"Polygon","coordinates":[[[100.0,0.0],[101.0,0.0],[101.0,1.0],[100.0,1.0],[100.0,0.0]],' + '[[100.2,0.2],[100.8,0.2],[100.8,0.8],[100.2,0.8],[100.2,0.2]]]}', 'multipoint': '{"type":"MultiPoint","coordinates":[[100.0,0.0],[101.0,1.0]]}', 'multiline': '{"type":"MultiLineString","coordinates":[[[100.0,0.0],[101.0,1.0]],[[102.0,2.0],[103.0,3.0]]]}', - 'multipolygon': '{"type":"MultiPolygon","coordinates":[[[[102.0,2.0],[103.0,2.0],[103.0,3.0],[102.0,3.0],[102.0,2.0]]],[[[100.0,0.0],[101.0,0.0],[101.0,1.0],[100.0,1.0],[100.0,0.0]],[[100.2,0.2],[100.8,0.2],[100.8,0.8],[100.2,0.8],[100.2,0.2]]]]}'} + 'multipolygon': '{"type":"MultiPolygon","coordinates":[[[[102.0,2.0],[103.0,2.0],[103.0,3.0],[102.0,3.0],' + '[102.0,2.0]]],[[[100.0,0.0],[101.0,0.0],[101.0,1.0],[100.0,1.0],[100.0,0.0]],' + '[[100.2,0.2],[100.8,0.2],[100.8,0.8],[100.2,0.8],[100.2,0.2]]]]}'} def _execute_script(script_path): diff --git a/ckanext/spatial/tests/model/test_harvested_metadata.py b/ckanext/spatial/tests/model/test_harvested_metadata.py index 05d8d6bc..1b7f1062 100644 --- a/ckanext/spatial/tests/model/test_harvested_metadata.py +++ b/ckanext/spatial/tests/model/test_harvested_metadata.py @@ -15,8 +15,7 @@ def open_xml_fixture(xml_filename): try: xml_string = xml_string_raw.encode("utf-8") except UnicodeDecodeError, e: - assert 0, 'ERROR: Unicode Error reading file \'%s\': %s' % \ - (metadata_filepath, e) + assert 0, 'ERROR: Unicode Error reading file: %s' % e return xml_string diff --git a/ckanext/spatial/tests/test_harvest.py b/ckanext/spatial/tests/test_harvest.py index db636af1..84dd8a4a 100644 --- a/ckanext/spatial/tests/test_harvest.py +++ b/ckanext/spatial/tests/test_harvest.py @@ -67,7 +67,7 @@ def _create_source_and_job(self, source_fixture): 'user': u'harvest'} if config.get('ckan.harvest.auth.profile') == u'publisher' \ - and not 'publisher_id' in source_fixture: + and 'publisher_id' not in source_fixture: source_fixture['publisher_id'] = self.publisher.id source_dict = get_action('harvest_source_create')(context, source_fixture) @@ -91,7 +91,7 @@ def _run_job_for_single_document(self, job, force_import=False, expect_gather_er else: assert len(job.gather_errors) == 0 - assert harvester.fetch_stage(object_ids) == True + assert harvester.fetch_stage(object_ids) is True obj = HarvestObject.get(object_ids[0]) assert obj, obj.content @@ -143,7 +143,7 @@ def test_harvest_basic(self): assert len(object_ids) == 2 # Fetch stage always returns True for Waf harvesters - assert harvester.fetch_stage(object_ids) == True + assert harvester.fetch_stage(object_ids) is True objects = [] for object_id in object_ids: @@ -159,7 +159,7 @@ def test_harvest_basic(self): pkg_ids = [pkg.id for pkg in pkgs] for obj in objects: - assert obj.current == True + assert obj.current is True assert obj.package_id in pkg_ids def test_harvest_fields_service(self): @@ -183,7 +183,7 @@ def test_harvest_fields_service(self): assert len(job.gather_errors) == 0 # Fetch stage always returns True for Single Doc harvesters - assert harvester.fetch_stage(object_ids) == True + assert harvester.fetch_stage(object_ids) is True obj = HarvestObject.get(object_ids[0]) assert obj, obj.content @@ -229,12 +229,18 @@ def test_harvest_fields_service(self): 'bbox-north-lat': u'61.0243', 'bbox-south-lat': u'54.4764484375', 'bbox-west-long': u'-9.099786875', - 'spatial': u'{"type": "Polygon", "coordinates": [[[0.5242365625, 54.4764484375], [-9.099786875, 54.4764484375], [-9.099786875, 61.0243], [0.5242365625, 61.0243], [0.5242365625, 54.4764484375]]]}', + 'spatial': u'{"type": "Polygon", "coordinates": [[[0.5242365625, 54.4764484375], [-9.099786875, 54.4764484375],' + u' [-9.099786875, 61.0243], [0.5242365625, 61.0243], [0.5242365625, 54.4764484375]]]}', # Other - 'coupled-resource': u'[{"href": ["http://scotgovsdi.edina.ac.uk/srv/en/csw?service=CSW&request=GetRecordById&version=2.0.2&outputSchema=http://www.isotc211.org/2005/gmd&elementSetName=full&id=250ea276-48e2-4189-8a89-fcc4ca92d652"], "uuid": ["250ea276-48e2-4189-8a89-fcc4ca92d652"], "title": []}]', + 'coupled-resource': u'[{"href": ["http://scotgovsdi.edina.ac.uk/srv/en/csw?service=CSW&' + u'request=GetRecordById&version=2.0.2&outputSchema=http://www.isotc211.org/2005/' + u'gmd&elementSetName=full&id=250ea276-48e2-4189-8a89-fcc4ca92d652"], ' + u'"uuid": ["250ea276-48e2-4189-8a89-fcc4ca92d652"], "title": []}]', 'dataset-reference-date': u'[{"type": "publication", "value": "2011-09-08"}]', 'frequency-of-update': u'daily', - 'licence': u'["Use of the One Scotland Gazetteer data used by this this service is available to any organisation that is a member of the One Scotland Mapping Agreement. It is not currently commercially available", "http://www.test.gov.uk/licenseurl"]', + 'licence': u'["Use of the One Scotland Gazetteer data used by this this service is available to any ' + u'organisation that is a member of the One Scotland Mapping Agreement. ' + u'It is not currently commercially available", "http://www.test.gov.uk/licenseurl"]', 'licence_url': u'http://www.test.gov.uk/licenseurl', 'metadata-date': u'2011-09-08T16:07:32', 'metadata-language': u'eng', @@ -292,7 +298,7 @@ def test_harvest_fields_dataset(self): assert len(job.gather_errors) == 0 # Fetch stage always returns True for Single Doc harvesters - assert harvester.fetch_stage(object_ids) == True + assert harvester.fetch_stage(object_ids) is True obj = HarvestObject.get(object_ids[0]) assert obj, obj.content @@ -311,7 +317,8 @@ def test_harvest_fields_dataset(self): 'name': u'country-parks-scotland', 'title': u'Country Parks (Scotland)', 'tags': [{u'name': u'Nature conservation'}], - 'notes': u'Parks are set up by Local Authorities to provide open-air recreation facilities close to towns and cities. [edited]' + 'notes': u'Parks are set up by Local Authorities to provide open-air recreation facilities ' + u'close to towns and cities. [edited]' } package_dict['tags'] = self.clean_tags(package_dict['tags']) @@ -341,7 +348,8 @@ def test_harvest_fields_dataset(self): u'[-8.97114288, 61.06066944], [0.205857204, 61.06066944], [0.205857204, 54.529947158]]]}', # Other 'coupled-resource': u'[]', - 'dataset-reference-date': u'[{"type": "creation", "value": "2004-02"}, {"type": "revision", "value": "2006-07-03"}]', + 'dataset-reference-date': u'[{"type": "creation", "value": "2004-02"}, ' + u'{"type": "revision", "value": "2006-07-03"}]', 'frequency-of-update': u'irregular', 'licence': u'["Reference and PSMA Only", "http://www.test.gov.uk/licenseurl"]', 'licence_url': u'http://www.test.gov.uk/licenseurl', @@ -446,7 +454,7 @@ def test_harvest_error_validation(self): assert len(job.gather_errors) == 0 # Fetch stage always returns True for Single Doc harvesters - assert harvester.fetch_stage(object_ids) == True + assert harvester.fetch_stage(object_ids) is True obj = HarvestObject.get(object_ids[0]) assert obj, obj.content @@ -484,7 +492,7 @@ def test_harvest_update_records(self): # Package was created assert first_package_dict - assert first_obj.current == True + assert first_obj.current is True assert first_obj.package # Create and run a second job, the package should not be updated @@ -504,7 +512,7 @@ def test_harvest_update_records(self): # Package was not updated assert second_package_dict, first_package_dict['id'] == second_package_dict['id'] assert not second_obj.package, not second_obj.package_id - assert second_obj.current == False, first_obj.current == True + assert second_obj.current is False, first_obj.current is True # Create and run a third job, forcing the importing to simulate an update in the package third_job = self._create_job(source.id) @@ -526,9 +534,9 @@ def test_harvest_update_records(self): # Package was updated assert third_package_dict, first_package_dict['id'] == third_package_dict['id'] assert third_obj.package, third_obj.package_id == first_package_dict['id'] - assert third_obj.current == True - assert second_obj.current == False - assert first_obj.current == False + assert third_obj.current is True + assert second_obj.current is False + assert first_obj.current is False def test_harvest_deleted_record(self): @@ -549,7 +557,7 @@ def test_harvest_deleted_record(self): # Package was created assert first_package_dict assert first_package_dict['state'] == u'active' - assert first_obj.current == True + assert first_obj.current is True # Delete package first_package_dict['state'] = u'deleted' @@ -569,7 +577,7 @@ def test_harvest_deleted_record(self): # Package was not updated assert second_package_dict, updated_package_dict['id'] == second_package_dict['id'] assert not second_obj.package, not second_obj.package_id - assert second_obj.current == False, first_obj.current == True + assert second_obj.current is False, first_obj.current is True # Harvest an updated document, with a more recent modified date, package should be # updated and reactivated @@ -594,8 +602,8 @@ def test_harvest_deleted_record(self): # Package was updated assert third_package_dict, third_package_dict['id'] == second_package_dict['id'] assert third_obj.package, third_obj.package - assert third_obj.current == True, second_obj.current == False - assert first_obj.current == False + assert third_obj.current is True, second_obj.current is False + assert first_obj.current is False assert 'NEWER' in third_package_dict['title'] assert third_package_dict['state'] == u'active' @@ -619,7 +627,7 @@ def test_harvest_different_sources_same_document(self): # Package was created assert first_package_dict assert first_package_dict['state'] == u'active' - assert first_obj.current == True + assert first_obj.current is True # Harvest the same document, unchanged, from another source, the package # is not updated. @@ -641,7 +649,7 @@ def test_harvest_different_sources_same_document(self): # Package was not updated assert second_package_dict, first_package_dict['id'] == second_package_dict['id'] assert not second_obj.package, not second_obj.package_id - assert second_obj.current == False, first_obj.current == True + assert second_obj.current is False, first_obj.current is True # Inactivate source1 and reharvest from source2, package should be updated third_job = self._create_job(source2.id) @@ -661,9 +669,9 @@ def test_harvest_different_sources_same_document(self): # Package was updated assert third_package_dict, first_package_dict['id'] == third_package_dict['id'] assert third_obj.package, third_obj.package_id == first_package_dict['id'] - assert third_obj.current == True - assert second_obj.current == False - assert first_obj.current == False + assert third_obj.current is True + assert second_obj.current is False + assert first_obj.current is False def test_harvest_different_sources_same_document_but_deleted_inbetween(self): @@ -684,7 +692,7 @@ def test_harvest_different_sources_same_document_but_deleted_inbetween(self): # Package was created assert first_package_dict assert first_package_dict['state'] == u'active' - assert first_obj.current == True + assert first_obj.current is True # Delete/withdraw the package first_package_dict = get_action('package_delete')(self.context, {'id': first_obj.package_id}) @@ -707,8 +715,8 @@ def test_harvest_different_sources_same_document_but_deleted_inbetween(self): # It would be good if the package was updated, but we see that it isn't assert second_package_dict, first_package_dict['id'] == second_package_dict['id'] assert not second_obj.package - assert second_obj.current == False - assert first_obj.current == True + assert second_obj.current is False + assert first_obj.current is True def test_harvest_moves_sources(self): @@ -729,7 +737,7 @@ def test_harvest_moves_sources(self): # Package was created assert first_package_dict assert first_package_dict['state'] == u'active' - assert first_obj.current == True + assert first_obj.current is True # Harvest the same document GUID but with a newer date, from another source. source2_fixture = { @@ -748,8 +756,8 @@ def test_harvest_moves_sources(self): # Now we have two packages assert second_package_dict, first_package_dict['id'] == second_package_dict['id'] assert second_obj.package - assert second_obj.current == True - assert first_obj.current == True + assert second_obj.current is True + assert first_obj.current is True # so currently, if you move a Gemini between harvest sources you need # to update the date to get it to reharvest, and then you should # withdraw the package relating to the original harvest source. @@ -772,7 +780,7 @@ def test_harvest_import_command(self): # Package was created assert before_package_dict - assert first_obj.current == True + assert first_obj.current is True assert first_obj.package # Create and run two more jobs, the package should not be updated @@ -796,9 +804,9 @@ def test_harvest_import_command(self): # Package was updated, and the current object remains the same assert after_package_dict, before_package_dict['id'] == after_package_dict['id'] - assert third_obj.current == False - assert second_obj.current == False - assert first_obj.current == True + assert third_obj.current is False + assert second_obj.current is False + assert first_obj.current is True source_dict = get_action('harvest_source_show')(self.context, {'id': source.id}) assert source_dict['status']['total_datasets'] == 1 @@ -906,11 +914,13 @@ def test_clean_tags(self): e269743a-cfda-4632-a939-0c8416ae801e - service + service
''' GUID = 'e269743a-cfda-4632-a939-0c8416ae801e' -GEMINI_MISSING_GUID = '''''' +GEMINI_MISSING_GUID = '''''' class TestGatherMethods(HarvestFixtureBase): @@ -1123,7 +1133,8 @@ def test_11_service_fail_gemini_schematron(self): errors = self.get_validation_errors('11_Service_Invalid_GEMINI_Service_Type.xml') assert len(errors) > 0 assert_in("Service type shall be one of" - " 'discovery', 'view', 'download', 'transformation', 'invoke' or 'other' following INSPIRE generic names.", errors) + " 'discovery', 'view', 'download', 'transformation', 'invoke' or 'other' " + "following INSPIRE generic names.", errors) def test_12_service_valid(self): errors = self.get_validation_errors('12_Service_Valid.xml') @@ -1133,4 +1144,5 @@ def test_13_dataset_fail_iso19139_schema_2(self): # This test Dataset has srv tags and only Service metadata should. errors = self.get_validation_errors('13_Dataset_Invalid_Element_srv.xml') assert len(errors) > 0 - assert_in('Element \'{http://www.isotc211.org/2005/srv}SV_ServiceIdentification\': This element is not expected.', errors) + assert_in('Element \'{http://www.isotc211.org/2005/srv}SV_ServiceIdentification\': ' + 'This element is not expected.', errors) diff --git a/ckanext/spatial/tests/test_validation.py b/ckanext/spatial/tests/test_validation.py index faadbd36..7f95bd94 100644 --- a/ckanext/spatial/tests/test_validation.py +++ b/ckanext/spatial/tests/test_validation.py @@ -121,14 +121,21 @@ def test_13_dataset_fail_iso19139_schema_2(self): 'gemini2.1/validation/13_Dataset_Invalid_Element_srv.xml') assert len(errors) > 0 assert_in('(gmx.xsd)', errors) - assert_in('Element \'{http://www.isotc211.org/2005/srv}SV_ServiceIdentification\': This element is not expected.', errors) + assert_in('Element \'{http://www.isotc211.org/2005/srv}SV_ServiceIdentification\': ' + 'This element is not expected.', errors) def test_schematron_error_extraction(self): validation_error_xml = ''' - + - Service type shall be one of 'discovery', 'view', 'download', 'transformation', 'invoke' or 'other' following INSPIRE generic names. + Service type shall be one of 'discovery', 'view', 'download', 'transformation', + 'invoke' or 'other' following INSPIRE generic names. diff --git a/ckanext/spatial/validation/validation.py b/ckanext/spatial/validation/validation.py index 634ac277..461d183b 100644 --- a/ckanext/spatial/validation/validation.py +++ b/ckanext/spatial/validation/validation.py @@ -223,7 +223,7 @@ def is_valid(cls, xml): error_details = [] for error in errors: message, details = cls.extract_error_details(error) - if not message in messages_already_reported: + if message not in messages_already_reported: # TODO: perhaps can extract the source line from the # error location error_details.append((details, None)) From 4aecd6c66656424d4744bd4f57f488017bfa64b3 Mon Sep 17 00:00:00 2001 From: Jari Voutilainen Date: Thu, 28 Feb 2019 13:10:22 +0200 Subject: [PATCH 07/12] Restore imports for avoiding side-effects --- ckanext/spatial/model/__init__.py | 3 +++ ckanext/spatial/validation/__init__.py | 2 ++ 2 files changed, 5 insertions(+) diff --git a/ckanext/spatial/model/__init__.py b/ckanext/spatial/model/__init__.py index 2e2033b3..23c51962 100644 --- a/ckanext/spatial/model/__init__.py +++ b/ckanext/spatial/model/__init__.py @@ -5,3 +5,6 @@ except ImportError: import pkgutil __path__ = pkgutil.extend_path(__path__, __name__) + +from package_extent import * # noqa +from harvested_metadata import * # noqa diff --git a/ckanext/spatial/validation/__init__.py b/ckanext/spatial/validation/__init__.py index 2e2033b3..56b6477e 100644 --- a/ckanext/spatial/validation/__init__.py +++ b/ckanext/spatial/validation/__init__.py @@ -5,3 +5,5 @@ except ImportError: import pkgutil __path__ = pkgutil.extend_path(__path__, __name__) + +from validation import * # noqa From 7f6953bb8fbaad01d5d91e3f0aad07a99530cf75 Mon Sep 17 00:00:00 2001 From: Jari Voutilainen Date: Thu, 28 Feb 2019 13:22:42 +0200 Subject: [PATCH 08/12] Restore imports as they were deleted too quickly --- ckanext/spatial/geoalchemy_common.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ckanext/spatial/geoalchemy_common.py b/ckanext/spatial/geoalchemy_common.py index 6efef52c..fed30449 100644 --- a/ckanext/spatial/geoalchemy_common.py +++ b/ckanext/spatial/geoalchemy_common.py @@ -13,6 +13,7 @@ if toolkit.check_ckan_version(min_version='2.3'): # CKAN >= 2.3, use GeoAlchemy2 + from geoalchemy2.elements import WKTElement # noqa from geoalchemy2 import Geometry from sqlalchemy import func ST_Transform = func.ST_Transform @@ -22,6 +23,7 @@ else: # CKAN < 2.3, use GeoAlchemy + from geoalchemy import WKTSpatialElement as WKTElement # noqa from geoalchemy import functions ST_Transform = functions.transform ST_Equals = functions.equals From 7bf043dafdd9c7e4159861a2c22de9be021c8d66 Mon Sep 17 00:00:00 2001 From: Jari Voutilainen Date: Thu, 28 Feb 2019 13:27:24 +0200 Subject: [PATCH 09/12] Exclude ckanext-harvest in flake8 test --- .travis.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index 3ff6b014..6b061cd6 100644 --- a/.travis.yml +++ b/.travis.yml @@ -41,6 +41,6 @@ jobs: script: - flake8 --version # stop the build if there are Python syntax errors or undefined names - - flake8 . --count --select=E901,E999,F821,F822,F823 --show-source --statistics --exclude ckan + - flake8 . --count --select=E901,E999,F821,F822,F823 --show-source --statistics --exclude ckan,ckanext-harvest # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide - - flake8 . --count --max-line-length=127 --statistics --exclude ckan + - flake8 . --count --max-line-length=127 --statistics --exclude ckan,ckanext-harvest From 4668cd544e56ffdab9a3135ed92fcead969da710 Mon Sep 17 00:00:00 2001 From: Jari Voutilainen Date: Thu, 28 Feb 2019 13:47:59 +0200 Subject: [PATCH 10/12] Ignore long lines in test xml --- ckanext/spatial/tests/test_harvest.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/ckanext/spatial/tests/test_harvest.py b/ckanext/spatial/tests/test_harvest.py index 84dd8a4a..b3ec2068 100644 --- a/ckanext/spatial/tests/test_harvest.py +++ b/ckanext/spatial/tests/test_harvest.py @@ -914,13 +914,11 @@ def test_clean_tags(self): e269743a-cfda-4632-a939-0c8416ae801e - service + service -''' +''' # noqa GUID = 'e269743a-cfda-4632-a939-0c8416ae801e' -GEMINI_MISSING_GUID = '''''' +GEMINI_MISSING_GUID = '''''' # noqa class TestGatherMethods(HarvestFixtureBase): From 302517fca3752c99bca026d31690456f5c80b438 Mon Sep 17 00:00:00 2001 From: Jari Voutilainen Date: Thu, 28 Feb 2019 13:59:51 +0200 Subject: [PATCH 11/12] Add missing space --- ckanext/spatial/tests/test_harvest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ckanext/spatial/tests/test_harvest.py b/ckanext/spatial/tests/test_harvest.py index b3ec2068..5e5d22f9 100644 --- a/ckanext/spatial/tests/test_harvest.py +++ b/ckanext/spatial/tests/test_harvest.py @@ -469,7 +469,7 @@ def test_harvest_error_validation(self): message = obj.errors[0].message assert_in('One email address shall be provided', message) - assert_in("Service type shall be one of 'discovery', 'view', 'download', 'transformation', 'invoke' or 'other'" + assert_in("Service type shall be one of 'discovery', 'view', 'download', 'transformation', 'invoke' or 'other' " "following INSPIRE generic names", message) assert_in('Limitations on public access code list value shall be \'otherRestrictions\'', message) assert_in('One organisation name shall be provided', message) From 779714c9b5c710e865d0af2a03bfbba30a63c5fb Mon Sep 17 00:00:00 2001 From: Jari Voutilainen Date: Fri, 15 Mar 2019 10:38:26 +0200 Subject: [PATCH 12/12] add missing imports --- ckanext/spatial/harvesters/__init__.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/ckanext/spatial/harvesters/__init__.py b/ckanext/spatial/harvesters/__init__.py index 2e2033b3..4f129fb5 100644 --- a/ckanext/spatial/harvesters/__init__.py +++ b/ckanext/spatial/harvesters/__init__.py @@ -5,3 +5,9 @@ except ImportError: import pkgutil __path__ = pkgutil.extend_path(__path__, __name__) + +from ckanext.spatial.harvesters.csw import CSWHarvester +from ckanext.spatial.harvesters.waf import WAFHarvester +from ckanext.spatial.harvesters.doc import DocHarvester + +__all__ = ['CSWHarvester', 'WAFHarvester', 'DocHarvester']