diff --git a/README.rst b/README.rst index e286af2..edf8c43 100644 --- a/README.rst +++ b/README.rst @@ -136,3 +136,25 @@ To publish a new version to PyPI follow these steps: git tag 0.0.2 git push --tags + +-------------------------------------- +NSIRA Json (Restful) Harvester Mapping +-------------------------------------- + +The following is a mapping of the NSIRA Json to the DCAT schema before it is ingested into CKAN. + +``` +title <-- label +name <-- extension.matrix +description <-- note[0] +identifier <-- extension.matrix +modified <-- updated +language <-- en +distribution <-- link.alternate +distribution.title <-- link.alternate[i].type.split("/")[1] +distribution.accessURL <-- link.alternate[i].href +distribution.downloadURL <-- link.alternate[i].href +distribution.format <-- link.alternate[i].type +``` + + diff --git a/ckanext/opendatani/assets/css/theme.css b/ckanext/opendatani/assets/css/theme.css index c529990..ca81270 100644 --- a/ckanext/opendatani/assets/css/theme.css +++ b/ckanext/opendatani/assets/css/theme.css @@ -29,12 +29,17 @@ body, background-image: none; background-color: inherit; } -.masthead .nav-collapse, + +.navbar { + border: none; /* Overriden main.css */ +} + +.masthead .navbar-collapse, .masthead .section, .navbar .nav { float: none; } -.masthead .nav-collapse { +.masthead .navbar-collapse { padding: 0; } html, @@ -225,6 +230,11 @@ li { width: 223px; } @media (min-width: 768px) { + + .module.context-info > .module-content { + padding-top: 33px; + } + [role="main"] .wrapper .media-grid .media-item { width: 223px; } @@ -324,17 +334,18 @@ h6 { } .masthead { color: #4d4d4d; + padding: 0; /* Overriden main.css */ } .masthead .logo { display: block; width: 282px; margin: 22px 0; } -.masthead .nav-collapse { +.masthead .navbar-collapse { background-color: #e8eaed; clear: both; position: static; - margin-left: -9999rem; + margin-left: -9998rem; margin-right: -9999rem; padding-left: 9999rem; padding-right: 9999rem; @@ -355,6 +366,7 @@ h6 { text-align: center; line-height: 30px; color: rgba(77, 77, 77, 0.25); + left: 10px; } .masthead .site-search label:after { display: none; @@ -371,7 +383,7 @@ h6 { } .masthead .site-search input { padding: 12px 50px 12px 45px; - width: 210px; + width: 300px; background-color: #e8eaed; border-radius: 0; border-color: #e8eaed; @@ -410,6 +422,7 @@ h6 { font-weight: bold; border-radius: 0; position: relative; + text-decoration: none; /* Overriden main.css */ } .masthead .nav > li > a:hover, .masthead .navigation .nav-pills > li > a:hover { @@ -447,8 +460,14 @@ h6 { border-top: solid 1px #dfe2e6; } } + +.homepage .featured-groups { + padding: 0; +} + .homepage .featured-groups li { background-color: #25374d; + margin-bottom: 20px; } .homepage .featured-groups li a { display: table; @@ -459,7 +478,7 @@ h6 { } @media (min-width: 768px) { .homepage .featured-groups li a { - height: 114px; + height: 134px; overflow: hidden; } } @@ -1048,11 +1067,15 @@ h6 { /* Custom UI Homepage updates */ .latest-posts.thumbnails { margin-top: 30px; + display: flex; + gap: 17px; + padding-left: 0; } + .latest-posts.thumbnails li { border: 1px solid #eee; - margin-left: 17px; min-height: 501px; + padding: 0; } .latest-posts.thumbnails li .post { padding: 10px; @@ -1103,6 +1126,12 @@ h6 { height: 160px; overflow: hidden; } + +/* Overriden main.css */ +.thumb img { + width: 100%; + height: auto; +} @media screen and (max-width: 768px) { .latest-posts .span4 { width: 100%; @@ -1110,4 +1139,54 @@ h6 { .mar-l-0 { margin-left: 0 !important; } + + .latest-posts.thumbnails { + flex-direction: column; + } + +} + +@media screen and (max-width: 1000px) { + .masthead .navbar-collapse, .toolbar { + padding: 5px 0; + margin: 0; + max-width: calc(100% + 30px); + width: calc(100% + 30px); + padding-left: 15px; + margin-left: -15px; + } + + .module-content.page-header { + margin: 0; + padding: 0; + } +} + +.module-content.page-header { + background: none; + border: none; +} + +.masthead .navigation .nav-pills li a:hover { + background-color: #dfe2e6; +} + +.masthead .navigation .nav-pills li a:focus, .masthead .navigation .nav-pills li.active a { + background: transparent; +} + +/* Overriding main.css of dev here */ +.wrapper { + border: none; + -webkit-box-shadow: none; + box-shadow: none; + background-color: transparent; +} + +.module-content { + padding: 30px 0; } + +.form-horizontal .form-group { + margin-left: 0; +} \ No newline at end of file diff --git a/ckanext/opendatani/dataset_schema.json b/ckanext/opendatani/dataset_schema.json index 486933f..27e402f 100644 --- a/ckanext/opendatani/dataset_schema.json +++ b/ckanext/opendatani/dataset_schema.json @@ -33,6 +33,10 @@ "validators": "scheming_required at_least_n_choices(1) scheming_multiple_choice", "output_validators": "scheming_multiple_choice_output", "choices": [ + { + "value": "governmentstatistics", + "label": "Government statistics" + }, { "value": "farming", "label": "Farming" @@ -205,6 +209,25 @@ "required": true, "validators": "boolean_validator datasets_with_no_organization_cannot_be_private opendatani_private_datasets", "form_snippet": "visibility_form_field.html" + }, + { + "field_name": "source_last_updated", + "preset": "datetime", + "label": "Source last updated" + }, + { + "field_name": "time_period", + "label": "Time period" + }, + { + "field_name": "title_tags", + "label": "Title+Tags" + }, + { + "field_name": "metatags", + "label": "Meta Tags", + "form_snippet": "markdown.html", + "form_placeholder": "Some useful json element" } ], "resource_fields": [ diff --git a/ckanext/opendatani/fanstatic/css/theme.css b/ckanext/opendatani/fanstatic/css/theme.css index c529990..fceb1e6 100644 --- a/ckanext/opendatani/fanstatic/css/theme.css +++ b/ckanext/opendatani/fanstatic/css/theme.css @@ -29,12 +29,12 @@ body, background-image: none; background-color: inherit; } -.masthead .nav-collapse, +.masthead .navbar-collapse, .masthead .section, .navbar .nav { float: none; } -.masthead .nav-collapse { +.masthead .navbar-collapse { padding: 0; } html, @@ -330,7 +330,7 @@ h6 { width: 282px; margin: 22px 0; } -.masthead .nav-collapse { +.masthead .navbar-collapse { background-color: #e8eaed; clear: both; position: static; @@ -410,6 +410,7 @@ h6 { font-weight: bold; border-radius: 0; position: relative; + text-decoration: none; } .masthead .nav > li > a:hover, .masthead .navigation .nav-pills > li > a:hover { @@ -459,7 +460,7 @@ h6 { } @media (min-width: 768px) { .homepage .featured-groups li a { - height: 114px; + height: 134px; overflow: hidden; } } diff --git a/ckanext/opendatani/fanstatic/less/_reset.less b/ckanext/opendatani/fanstatic/less/_reset.less index 6e345a2..07b596a 100644 --- a/ckanext/opendatani/fanstatic/less/_reset.less +++ b/ckanext/opendatani/fanstatic/less/_reset.less @@ -8,13 +8,13 @@ body, background-color:inherit; } -.masthead .nav-collapse, +.masthead .navbar-collapse, .masthead .section, .navbar .nav { float:none; } -.masthead .nav-collapse { +.masthead .navbar-collapse { padding:0; } diff --git a/ckanext/opendatani/json_dcat.py b/ckanext/opendatani/json_dcat.py new file mode 100644 index 0000000..07231c2 --- /dev/null +++ b/ckanext/opendatani/json_dcat.py @@ -0,0 +1,615 @@ +from builtins import str +import json +import logging +from hashlib import sha1 +import traceback +import uuid + +import requests +import rdflib +import os + +from ckan import model +from ckan import logic +from ckan import plugins as p +from ckanext.harvest.model import HarvestObject, HarvestObjectExtra + +from ckanext.dcat import converters +from ckanext.dcat.harvesters.base import DCATHarvester +from sqlalchemy.orm import Query +import datetime +import six +from ckanext.dcat.interfaces import IDCATRDFHarvester +import re + +log = logging.getLogger(__name__) + +def convert_to_html(text): + """Converts text with formatting to HTML. + + Args: + text: The text to be converted. + + Returns: + The converted HTML string. + """ + # Replace bold tags + text = text.replace("[b]", "").replace("[/b]", "") + + # Replace URL links + url_pattern = r'\[url=(https?://[^\]]+|mailto:[^\]]+)\](.*?)\[/url\]' + replacement = r'[\2](\1)' + text = re.sub(url_pattern, replacement, text) + + return text + +def _remove_extra(key, dataset_dict): + dataset_dict['extras'][:] = [e + for e in dataset_dict['extras'] + if e['key'] != key] + +class NsiraJSONHarvester(DCATHarvester): + + def info(self): + return { + 'name': 'nsira_dcatjson', + 'title': 'NSIRA JSON (Restful) Harvester', + 'description': 'Harvester for Restful dataset descriptions ' + + 'serialized as JSON' + } + + def _read_datasets_from_db(self, guid): + ''' + Returns a database result of datasets matching the given guid. + ''' + + datasets = model.Session.query(model.Package.id) \ + .join(model.PackageExtra) \ + .filter(model.PackageExtra.key == 'guid') \ + .filter(model.PackageExtra.value == guid) \ + .filter(model.Package.state == 'active') \ + .all() + return datasets + + def _get_existing_dataset(self, guid): + ''' + Checks if a dataset with a certain guid extra already exists + + Returns a dict as the ones returned by package_show + ''' + + datasets = self._read_datasets_from_db(guid) + + if not datasets: + return None + elif len(datasets) > 1: + log.error('Found more than one dataset with the same guid: {0}' + .format(guid)) + + return p.toolkit.get_action('package_show')({}, {'id': datasets[0][0]}) + + def _get_content_and_type(self, url, harvest_job, page=1, + content_type=None): + ''' + Gets the content and type of the given url. + + :param url: a web url (starting with http) or a local path + :param harvest_job: the job, used for error reporting + :param page: adds paging to the url + :param content_type: will be returned as type + :return: a tuple containing the content and content-type + ''' + + if not url.lower().startswith('http'): + # Check local file + if os.path.exists(url): + with open(url, 'r') as f: + content = f.read() + content_type = content_type or rdflib.util.guess_format(url) + return content, content_type + else: + self._save_gather_error('Could not get content for this url', + harvest_job) + return None, None + + try: + + if page > 1: + url = url + '&' if '?' in url else url + '?' + url = url + 'page={0}'.format(page) + + log.debug('Getting file %s', url) + + # get the `requests` session object + session = requests.Session() + for harvester in p.PluginImplementations(IDCATRDFHarvester): + session = harvester.update_session(session) + + # first we try a HEAD request which may not be supported + did_get = False + r = session.head(url) + + if r.status_code == 405 or r.status_code == 400: + r = session.get(url, stream=True) + did_get = True + r.raise_for_status() + + cl = r.headers.get('content-length') + if cl and int(cl) > self.MAX_FILE_SIZE: + msg = '''Remote file is too big. Allowed + file size: {allowed}, Content-Length: {actual}.'''.format( + allowed=self.MAX_FILE_SIZE, actual=cl) + self._save_gather_error(msg, harvest_job) + return None, None + + if not did_get: + r = session.get(url, stream=True) + + length = 0 + content = '' if six.PY2 else b'' + for chunk in r.iter_content(chunk_size=self.CHUNK_SIZE): + content = content + chunk + + length += len(chunk) + + if length >= self.MAX_FILE_SIZE: + self._save_gather_error('Remote file is too big.', + harvest_job) + return None, None + + if not six.PY2: + content = content.decode('utf-8') + + if content_type is None and r.headers.get('content-type'): + content_type = r.headers.get('content-type').split(";", 1)[0] + + + # if content is a JSON array of URLS, fetch each url + try: + urls = json.loads(content) + if isinstance(urls, list) and all(isinstance(u, str) for u in urls): + combined_content = [] + for package_url in urls: + package_content, _ = self._get_content_and_type(package_url, harvest_job) + if package_content: + combined_content.append(json.loads(package_content)) + content = json.dumps(combined_content).encode('utf-8') + content_type = 'application/json' + if not six.PY2: + content = content.decode('utf-8') + except json.JSONDecodeError: + self._save_gather_error('Could not parse content as JSON', harvest_job) + return None, None + + + return content, content_type + + except requests.exceptions.HTTPError as error: + if page > 1 and error.response.status_code == 404: + # We want to catch these ones later on + raise + + msg = 'Could not get content from %s. Server responded with %s %s'\ + % (url, error.response.status_code, error.response.reason) + self._save_gather_error(msg, harvest_job) + return None, None + except requests.exceptions.ConnectionError as error: + msg = '''Could not get content from %s because a + connection error occurred. %s''' % (url, error) + self._save_gather_error(msg, harvest_job) + return None, None + except requests.exceptions.Timeout as error: + msg = 'Could not get content from %s because the connection timed'\ + ' out.' % url + self._save_gather_error(msg, harvest_job) + return None, None + + + + def _get_guids_and_datasets(self, content): + doc = json.loads(content) + + + if isinstance(doc, list): + # Assume a list of datasets + datasets = doc + elif isinstance(doc, dict): + datasets = [doc] + else: + raise ValueError('Wrong JSON object') + + + frequency = { + "TLIST(A1)": "annually", + "TLIST(Q1)": "quarterly", + "TLIST(M1)": "monthly", + } + + for dataset in datasets: + filtered_keys = [key for key in dataset["dimension"] if key not in ("STATISTIC", "TLIST(A1)")] + labels = [dataset["dimension"][key]["label"] for key in filtered_keys] + + if len(labels) == 1: + output_string = labels[0] + else: + output_string = " by ".join(labels[:-1]) + " and " + labels[-1] + + + # get Tlist from dataset using keys in frquency + frequency_key = [key for key in dataset["dimension"] if key in ("TLIST(A1)", "TLIST(Q1)", "TLIST(M1)")] + frequency_key = frequency_key[0] + frequency_value = dataset["dimension"][frequency_key]["category"]["index"] + time_period = f"{frequency_value[0]} - {frequency_value[-1]}" + allowed_keys = {"exceptional", "official", "reservation", "archive", "experimental", "analytical"} + tags = {k: v for k, v in dataset["extension"].items() if not isinstance(v, dict) and k in allowed_keys} + + + dataset_copy = { + "title": dataset['label'], + "titleTags": dataset['label'] + " "+ "by " + output_string, + "name": dataset['extension']['matrix'], + "description": convert_to_html(dataset['note'][0]), + "identifier": dataset['extension']['matrix'], + "modified": dataset['updated'], + "landingPage": "", + "publisher": { + "name": dataset['extension']['contact'].get('name', ''), + "mbox": dataset['extension']['contact'].get('email', '') + }, + "fn": dataset['extension']['contact'].get('name', 'not-provided'), + "hasEmail": dataset['extension']['contact'].get('email', 'notprovided@mail.com'), + + "language": [ + "en" + ], + "distribution": [], + "frequency": frequency[frequency_key], + "timePeriod": time_period, + "metaTags": json.dumps(tags), + } + + for resource in dataset['link']['alternate']: + if resource['type'] == "application/base64": + dataset_copy['distribution'].append({ + 'title': "Xlsx", + 'accessURL': resource['href'], + 'downloadURL': resource['href'], + 'format': "xlsx" + }) + + elif resource['type'] == "application/json": + dataset_copy['distribution'].append({ + 'title': f"JSON {resource['href'].split('/')[-2]}", + 'accessURL': resource['href'], + 'downloadURL': resource['href'], + 'format': f"json{resource['href'].split('/')[-2]}" + }) + + else: + dataset_copy['distribution'].append({ + 'title': resource['type'].split("/")[1], + 'accessURL': resource['href'], + 'downloadURL': resource['href'], + 'format': resource['type'] + }) + + + + as_string = json.dumps(dataset_copy) + # Get identifier + guid = dataset_copy.get('identifier') + if not guid: + # This is bad, any ideas welcomed + guid = sha1(as_string).hexdigest() + + yield guid, as_string + + def _get_package_dict(self, harvest_object): + + content = harvest_object.content + + dcat_dict = json.loads(content) + + package_dict = converters.dcat_to_ckan(dcat_dict) + + package_dict['name'] = dcat_dict['name'].lower() + + return package_dict, dcat_dict + + def gather_stage(self, harvest_job): + log.debug('In DCATJSONHarvester gather_stage steve') + + ids = [] + + # Get the previous guids for this source + query = \ + model.Session.query(HarvestObject.guid, HarvestObject.package_id) \ + .filter(HarvestObject.current == True) \ + .filter(HarvestObject.harvest_source_id == harvest_job.source.id) + guid_to_package_id = {} + + for guid, package_id in query: + guid_to_package_id[guid] = package_id + + guids_in_db = list(guid_to_package_id.keys()) + guids_in_source = [] + + # Get file contents + url = harvest_job.source.url + + previous_guids = [] + page = 1 + while True: + + try: + content, content_type = \ + self._get_content_and_type(url, harvest_job, page) + except requests.exceptions.HTTPError as error: + if error.response.status_code == 404: + if page > 1: + # Server returned a 404 after the first page, no more + # records + log.debug('404 after first page, no more pages') + break + else: + # Proper 404 + msg = 'Could not get content. Server responded with ' \ + '404 Not Found' + self._save_gather_error(msg, harvest_job) + return None + else: + # This should never happen. Raising just in case. + raise + + if not content: + return None + + try: + + batch_guids = [] + for guid, as_string in self._get_guids_and_datasets(content): + log.debug('Got identifier: {0}' + .format(guid.encode('utf8'))) + + batch_guids.append(guid) + + if guid not in previous_guids: + + if guid in guids_in_db: + # Dataset needs to be udpated + obj = HarvestObject( + guid=guid, job=harvest_job, + package_id=guid_to_package_id[guid], + content=as_string, + extras=[HarvestObjectExtra(key='status', + value='change')]) + else: + # Dataset needs to be created + obj = HarvestObject( + guid=guid, job=harvest_job, + content=as_string, + extras=[HarvestObjectExtra(key='status', + value='new')]) + obj.save() + ids.append(obj.id) + + if len(batch_guids) > 0: + guids_in_source.extend(set(batch_guids) + - set(previous_guids)) + else: + log.debug('Empty document, no more records') + # Empty document, no more ids + break + + except ValueError as e: + msg = 'Error parsing file: {0}'.format(str(e)) + self._save_gather_error(msg, harvest_job) + return None + + if sorted(previous_guids) == sorted(batch_guids): + # Server does not support pagination or no more pages + log.debug('Same content, no more pages') + break + + page = page + 1 + + previous_guids = batch_guids + + # Check datasets that need to be deleted + guids_to_delete = set(guids_in_db) - set(guids_in_source) + for guid in guids_to_delete: + obj = HarvestObject( + guid=guid, job=harvest_job, + package_id=guid_to_package_id[guid], + extras=[HarvestObjectExtra(key='status', value='delete')]) + ids.append(obj.id) + model.Session.query(HarvestObject).\ + filter_by(guid=guid).\ + update({'current': False}, False) + obj.save() + + return ids + + def fetch_stage(self, harvest_object): + return True + + def import_stage(self, harvest_object): + log.debug('In DCATJSONHarvester import_stage') + if not harvest_object: + log.error('No harvest object received') + return False + + if self.force_import: + status = 'change' + else: + status = self._get_object_extra(harvest_object, 'status') + + if status == 'delete': + # Delete package + context = {'model': model, 'session': model.Session, + 'user': self._get_user_name()} + + p.toolkit.get_action('package_delete')( + context, {'id': harvest_object.package_id}) + log.info('Deleted package {0} with guid {1}' + .format(harvest_object.package_id, harvest_object.guid)) + + return True + + if harvest_object.content is None: + self._save_object_error( + 'Empty content for object %s' % harvest_object.id, + harvest_object, 'Import') + return False + + # Get the last harvested object (if any) + previous_object = model.Session.query(HarvestObject) \ + .filter(HarvestObject.guid == harvest_object.guid) \ + .filter(HarvestObject.current == True) \ + .first() + + # Flag previous object as not current anymore + if previous_object and not self.force_import: + previous_object.current = False + previous_object.add() + + + package_dict, dcat_dict = self._get_package_dict(harvest_object) + + + if not package_dict: + return False + + if not package_dict.get('name'): + package_dict['name'] = \ + self._get_package_name(harvest_object, package_dict['title']) + + # copy across resource ids from the existing dataset, otherwise they'll + # be recreated with new ids + if status == 'change': + existing_dataset = self._get_existing_dataset(harvest_object.guid) + if existing_dataset: + copy_across_resource_ids(existing_dataset, package_dict) + + # Allow custom harvesters to modify the package dict before creating + # or updating the package + package_dict = self.modify_package_dict(package_dict, + dcat_dict, + harvest_object) + # Unless already set by an extension, get the owner organization (if + # any) from the harvest source dataset + if not package_dict.get('owner_org'): + source_dataset = model.Package.get(harvest_object.source.id) + if source_dataset.owner_org: + package_dict['owner_org'] = source_dataset.owner_org + + # Flag this object as the current one + harvest_object.current = True + harvest_object.add() + + context = { + 'user': self._get_user_name(), + 'return_id_only': True, + 'ignore_auth': True, + } + + try: + if status == 'new': + package_schema = logic.schema.default_create_package_schema() + + context['schema'] = package_schema + + # We need to explicitly provide a package ID + package_dict['id'] = str(uuid.uuid4()) + package_schema['id'] = [str] + + # Save reference to the package on the object + harvest_object.package_id = package_dict['id'] + harvest_object.add() + + # Defer constraints and flush so the dataset can be indexed with + # the harvest object id (on the after_show hook from the harvester + # plugin) + model.Session.execute( + 'SET CONSTRAINTS harvest_object_package_id_fkey DEFERRED') + model.Session.flush() + + elif status == 'change': + package_dict['id'] = harvest_object.package_id + + if status in ['new', 'change']: + action = 'package_create' if status == 'new' else 'package_update' + message_status = 'Created' if status == 'new' else 'Updated' + package_dict['frequency'] = dcat_dict.get('frequency', '') + package_dict['topic_category'] = 'governmentstatistics' + package_dict['lineage'] = 'NISRA' + package_dict['contact_name'] = dcat_dict.get('fn', '') + package_dict['contact_email'] = dcat_dict.get('hasEmail', '') + package_dict['license_id'] = 'uk-ogl' + package_dict['source_last_updated'] = dcat_dict.get('modified', '')[:19].replace('.', '') + package_dict['time_period'] = dcat_dict.get('timePeriod', '') + package_dict['title_tags'] = dcat_dict.get('titleTags', '') + package_dict['metatags'] = dcat_dict.get('metaTags', '') + package_id = p.toolkit.get_action(action)(context, package_dict) + log.info('%s dataset with id %s', message_status, package_id) + + except Exception as e: + dataset = json.loads(harvest_object.content) + dataset_name = dataset.get('name', '') + + self._save_object_error('Error importing dataset %s: %r / %s' % (dataset_name, e, traceback.format_exc()), harvest_object, 'Import') + return False + + finally: + model.Session.commit() + + return True + +def copy_across_resource_ids(existing_dataset, harvested_dataset): + '''Compare the resources in a dataset existing in the CKAN database with + the resources in a freshly harvested copy, and for any resources that are + the same, copy the resource ID into the harvested_dataset dict. + ''' + # take a copy of the existing_resources so we can remove them when they are + # matched - we don't want to match them more than once. + existing_resources_still_to_match = \ + [r for r in existing_dataset.get('resources')] + + # we match resources a number of ways. we'll compute an 'identity' of a + # resource in both datasets and see if they match. + # start with the surest way of identifying a resource, before reverting + # to closest matches. + resource_identity_functions = [ + lambda r: r['uri'], # URI is best + lambda r: (r['url'], r['title'], r['format']), + lambda r: (r['url'], r['title']), + lambda r: r['url'], # same URL is fine if nothing else matches + ] + + for resource_identity_function in resource_identity_functions: + # calculate the identities of the existing_resources + existing_resource_identities = {} + for r in existing_resources_still_to_match: + try: + identity = resource_identity_function(r) + existing_resource_identities[identity] = r + except KeyError: + pass + + # calculate the identities of the harvested_resources + for resource in harvested_dataset.get('resources'): + try: + identity = resource_identity_function(resource) + except KeyError: + identity = None + if identity and identity in existing_resource_identities: + # we got a match with the existing_resources - copy the id + matching_existing_resource = \ + existing_resource_identities[identity] + resource['id'] = matching_existing_resource['id'] + # make sure we don't match this existing_resource again + del existing_resource_identities[identity] + existing_resources_still_to_match.remove( + matching_existing_resource) + if not existing_resources_still_to_match: + break diff --git a/ckanext/opendatani/plugin.py b/ckanext/opendatani/plugin.py index 361f709..5ac8b74 100644 --- a/ckanext/opendatani/plugin.py +++ b/ckanext/opendatani/plugin.py @@ -1,7 +1,7 @@ import datetime # from pylons import config from ckan.common import config -import routes.mapper +# import routes.mapper import logging import ckan.plugins as plugins @@ -171,9 +171,6 @@ def custom_user_create(context, data_dict): def custom_user_update(context, data_dict): - context['schema'] = custom_update_user_schema( - form_schema='password1' in context.get('schema', {})) - return core_user_update(context, data_dict) diff --git a/ckanext/opendatani/templates/ckanext_pages/base_form.html b/ckanext/opendatani/templates/ckanext_pages/base_form.html index 40e5dcc..6b97fd3 100644 --- a/ckanext/opendatani/templates/ckanext_pages/base_form.html +++ b/ckanext/opendatani/templates/ckanext_pages/base_form.html @@ -30,7 +30,7 @@ {% set slug_prefix = cancel_url ~ '/' %} {% set slug_domain = h.url_for('pages.index', qualified=true) %} {% if page %} - {% set delete_url = h.url_for('pages.pages_delete', page=data.name) %} + {% set delete_url = h.url_for('pages.delete', page=data.name) %} {% endif %} {% endif %} diff --git a/ckanext/opendatani/templates/ckanext_pages/snippets/pages_list.html b/ckanext/opendatani/templates/ckanext_pages/snippets/pages_list.html index e97bb40..28a122b 100644 --- a/ckanext/opendatani/templates/ckanext_pages/snippets/pages_list.html +++ b/ckanext/opendatani/templates/ckanext_pages/snippets/pages_list.html @@ -20,11 +20,11 @@

Pages

{% endif %}
{% if page.image %} -
+ -
+

{{ page.title }} {% if page.publish_date %} @@ -44,7 +44,7 @@

{% endif %}

{% else %} -
+

{{ page.title }} {% if page.publish_date %} diff --git a/ckanext/opendatani/templates/footer.html b/ckanext/opendatani/templates/footer.html index 72178cf..9b86da0 100644 --- a/ckanext/opendatani/templates/footer.html +++ b/ckanext/opendatani/templates/footer.html @@ -2,16 +2,16 @@ {% block footer_content %}
-
+
NI Direct
-
+
Digital NI
-
+
CKAN
-
+
Datopian
diff --git a/ckanext/opendatani/templates/header.html b/ckanext/opendatani/templates/header.html index 4d88189..adb8d65 100644 --- a/ckanext/opendatani/templates/header.html +++ b/ckanext/opendatani/templates/header.html @@ -3,18 +3,18 @@ {% block header_account_logged %}
  • - +
  • - +
  • {% if c.userobj.sysadmin %}
  • - + {{ _('Admin') }}
  • @@ -30,7 +30,7 @@
  • {% set notifications_tooltip = ngettext('Dashboard (%(num)d new item)', 'Dashboard (%(num)d new items)', new_activities+stale_datasets|int) %} - + {{ _('Dashboard') }} {{ new_activities + stale_datasets|int }} @@ -38,7 +38,7 @@ {% block header_account_settings_link %}
  • - + {{ _('Settings') }}
  • @@ -46,11 +46,19 @@ {% block header_account_log_out_link %}
  • - + {{ _('Log out') }}
  • {% endblock %} +{% block header_contacts_link %} +
  • + + + {{ _('Contact') }} + +
  • +{% endblock %} {% endblock %} {% block header_account_notlogged %} @@ -77,13 +85,14 @@
  • {{ _('Contact') }}
  • + {% endblock %} {% block header_site_search %} -
    diff --git a/ckanext/opendatani/templates/home/snippets/group_item.html b/ckanext/opendatani/templates/home/snippets/group_item.html index f1557ab..8f9239e 100644 --- a/ckanext/opendatani/templates/home/snippets/group_item.html +++ b/ckanext/opendatani/templates/home/snippets/group_item.html @@ -1,14 +1,16 @@ {% set type = group.type or 'group' %} {% set url = h.url_for(type ~ '_read', action='read', id=group.name) %} {% block item %} -
  • - - - {{ group.name }} - -

    {{ group.display_name }}

    -
    -
  • + {% endblock %} {% if position is divisibleby 3 %} {% endif %} diff --git a/ckanext/opendatani/templates/organization/edit_base.html b/ckanext/opendatani/templates/organization/edit_base.html index 20d95b2..5e71ea9 100644 --- a/ckanext/opendatani/templates/organization/edit_base.html +++ b/ckanext/opendatani/templates/organization/edit_base.html @@ -1,8 +1,8 @@ {% ckan_extends %} {% block content_primary_nav %} - {{ h.build_nav_icon('organization_edit', _('Edit'), id=c.group_dict.name) }} - {{ h.build_nav_icon('organization_bulk_process', _('Datasets'), id=c.group_dict.name) }} - {{ h.build_nav_icon('organization_members', _('Members'), id=c.group_dict.name) }} - {{ h.build_nav_icon('add_groups', _('Add groups'), id=c.group_dict.name) }} + {{ h.build_nav_icon(group_type + '.edit', _('Edit'), id=group_dict.name, icon='pencil-square-o') }} + {{ h.build_nav_icon(group_type + '.bulk_process', _('Datasets'), id=group_dict.name, icon='sitemap') }} + {{ h.build_nav_icon(group_type + '.members', _('Members'), id=group_dict.name, icon='users') }} + {{ h.build_nav_icon('odni_organization.add_groups', _('Add groups'), id=c.group_dict.name, icon='file') }} {% endblock %} diff --git a/ckanext/opendatani/templates/package/resource_read.html b/ckanext/opendatani/templates/package/resource_read.html new file mode 100644 index 0000000..0174071 --- /dev/null +++ b/ckanext/opendatani/templates/package/resource_read.html @@ -0,0 +1,61 @@ +{% ckan_extends %} {% block primary_content %} +{% block resource_additional_information %} +{% if res %} +
    +
    +

    {{ _('Additional Information') }}

    + + + + + + + + + + + + + + + + + + + + + + + + + + {% for key, value in h.format_resource_items(res.items()) %} {% if key + not in ('created','Created', 'metadata modified', 'last modified', + 'format') %} + + + + + {% endif %} {% endfor %} + +
    {{ _('Field') }}{{ _('Value') }}
    {{ _('Metadata last updated') }} + {{ h.render_datetime(res.metadata_modified) or + h.render_datetime(res.metadata_modified) or _('unknown') }} +
    {{ _('Created') }}{{ h.render_datetime(res.created) or h.render_datetime(res.Created) or _('unknown') }}
    {{ _('Format') }} + {{ res.format or res.mimetype_inner or res.mimetype or _('unknown') + }} +
    {{ _('License') }} + {% snippet "snippets/license.html", pkg_dict=pkg, text_only=True %} +
    {{ key | capitalize }}{{ value }}
    +
    +
    +{% endif %} {% endblock %} {% endblock %} diff --git a/ckanext/opendatani/templates/user/dashboard.html b/ckanext/opendatani/templates/user/dashboard.html index a25e369..c232278 100644 --- a/ckanext/opendatani/templates/user/dashboard.html +++ b/ckanext/opendatani/templates/user/dashboard.html @@ -12,19 +12,19 @@ {% block page_header %} {% endblock %} diff --git a/setup.py b/setup.py index e275997..bf0b7aa 100644 --- a/setup.py +++ b/setup.py @@ -79,6 +79,7 @@ entry_points=''' [ckan.plugins] opendatani=ckanext.opendatani.plugin:OpendataniPlugin + nisra_jsondcat=ckanext.opendatani.json_dcat:NsiraJSONHarvester [paste.paster_command] create_featured_groups=ckanext.opendatani.commands:CreateFeaturedGroups @@ -91,6 +92,7 @@ esri_arcgis_profile=ckanext.opendatani.dcat:EsriArcGISProfile daera_core_profile=ckanext.opendatani.dcat:DaeraCoreProfile nisra_profile=ckanext.opendatani.dcat:NisraProfile + ''', )