From 124e4219c8882d7fab594fbcf64b3b9c0a27d356 Mon Sep 17 00:00:00 2001 From: huberrob Date: Thu, 17 Dec 2020 11:29:37 +0100 Subject: [PATCH 1/3] signposting links --- fuji_server/controllers/fair_check.py | 33 ++++++++++++++----- .../fair_evaluator_minimal_metadata.py | 3 ++ .../fair_evaluator_persistent_identifier.py | 23 +++++++++++-- 3 files changed, 49 insertions(+), 10 deletions(-) diff --git a/fuji_server/controllers/fair_check.py b/fuji_server/controllers/fair_check.py index ffd979cc..dd2734a8 100644 --- a/fuji_server/controllers/fair_check.py +++ b/fuji_server/controllers/fair_check.py @@ -105,7 +105,7 @@ def __init__(self, uid, test_debug=False, oaipmh=None, use_datacite=True): self.landing_url = None # url of the landing page of self.pid_url self.landing_html = None self.landing_origin = None # schema + authority of the landing page e.g. https://www.pangaea.de - self.signposting_header_links = dict() + self.signposting_header_links = [] self.pid_scheme = None self.id_scheme= None self.logger = logging.getLogger(self.__class__.__name__) @@ -369,7 +369,7 @@ def retrieve_metadata_embedded(self, extruct_metadata): 'FsF-F3-01M : Found data links in HTML head (link rel=item) : ' + str(len(links))) if self.metadata_merged.get('object_content_identifier') is None: self.metadata_merged['object_content_identifier'] = links - self.metadata_sources.append((MetaDataCollector.Sources.SIGN_POSTING.value,'linked')) + self.metadata_sources.append((MetaDataCollector.Sources.TYPED_LINK.value,'linked')) #Now if an identifier has been detected in the metadata, potentially check for persistent identifier has to be repeated.. if self.metadata_merged.get('object_identifier'): @@ -380,7 +380,8 @@ def retrieve_metadata_embedded(self, extruct_metadata): found_pids_in_metadata.remove('url') found_id = found_pids_in_metadata[0] if found_id in Mapper.VALID_PIDS.value: - self.logger.info('FsF-F1-02D : Found object identifier in metadata, repeating PID check') + self.logger.info('FsF-F2-01M : Found object identifier in metadata, repeating PID check for FsF-F1-02D') + self.logger.log(self.LOG_SUCCESS, 'FsF-F1-02D : Found object identifier in metadata during FsF-F2-01M, PID check was repeated') self.repeat_pid_check = True self.pid_scheme = found_id self.id = self.metadata_merged.get('object_identifier') @@ -405,6 +406,13 @@ def get_html_typed_links(self, rel="item"): datalinks.append({'url': href, 'type': l.attrib.get('type'), 'rel': l.attrib.get('rel'), 'profile': l.attrib.get('format')}) return datalinks + def get_signposting_links(self, rel="item"): + signlinks =[] + for signposting_links in self.signposting_header_links: + if signposting_links.get('rel') == rel: + signlinks.append(signposting_links) + return signlinks + def get_guessed_xml_link(self): # in case object landing page URL ends with '.html' or '/html' # try to find out if there is some xml content if suffix is replaced by 'xml @@ -506,16 +514,25 @@ def retrieve_metadata_external(self): self.logger.info('FsF-F2-01M : Datacite metadata UNAVAILABLE') else: self.logger.info('FsF-F2-01M : Not a PID, therefore Datacite metadata (json) not requested.') - #dcat style + sign_header_links = [] + #signposting header links + if self.get_signposting_links('describedby'): + sign_header_links = self.get_signposting_links('describedby') + self.metadata_sources.append((MetaDataCollector.Sources.SIGN_POSTING.value, 'signposting')) + #dcat style meta links typed_metadata_links = self.get_html_typed_links(rel='alternate') - #ddi style + #ddi style meta links rel_meta_links = self.get_html_typed_links(rel='meta') - #signposting style - sign_metadata_links = self.get_html_typed_links(rel='describedby') + #signposting style meta links + sign_meta_links = self.get_html_typed_links(rel='describedby') + - typed_metadata_links.extend(sign_metadata_links) + + typed_metadata_links.extend(sign_meta_links) typed_metadata_links.extend(rel_meta_links) + typed_metadata_links.extend(sign_header_links) guessed_metadata_link = self.get_guessed_xml_link() + if guessed_metadata_link is not None: typed_metadata_links.append(guessed_metadata_link) diff --git a/fuji_server/evaluators/fair_evaluator_minimal_metadata.py b/fuji_server/evaluators/fair_evaluator_minimal_metadata.py index 8adab963..f8fb62aa 100644 --- a/fuji_server/evaluators/fair_evaluator_minimal_metadata.py +++ b/fuji_server/evaluators/fair_evaluator_minimal_metadata.py @@ -81,6 +81,9 @@ def evaluate(self): if source_mechanism == 'linked': self.setEvaluationCriteriumScore('FsF-F2-01M-1c', 0,'pass') self.setEvaluationCriteriumScore('FsF-F2-01M-1', 0, 'pass') + if source_mechanism == 'signposting': + self.setEvaluationCriteriumScore('FsF-F2-01M-1d', 0,'pass') + self.setEvaluationCriteriumScore('FsF-F2-01M-1', 0, 'pass') self.result.test_status = test_status self.result.metric_tests = self.metric_tests self.result.score = self.score diff --git a/fuji_server/evaluators/fair_evaluator_persistent_identifier.py b/fuji_server/evaluators/fair_evaluator_persistent_identifier.py index 47bec67b..2626dd49 100644 --- a/fuji_server/evaluators/fair_evaluator_persistent_identifier.py +++ b/fuji_server/evaluators/fair_evaluator_persistent_identifier.py @@ -56,19 +56,38 @@ def evaluate(self): # identify signposting links in header header_link_string = requestHelper.getHTTPResponse().getheader('Link') if header_link_string is not None: + self.logger.info('FsF-F1-02D : Found signposting links in response header of landingpage') + for preparsed_link in header_link_string.split(','): + found_link = None + found_type, type_match = None, None + found_rel, rel_match = None, None parsed_link = preparsed_link.strip().split(';') found_link = parsed_link[0].strip() - found_rel = re.search('rel=\"([a-z-]+)\"', parsed_link[1]) + for link_prop in parsed_link[1:]: + if str(link_prop).startswith('rel="'): + rel_match = re.search('rel=\"(.*?)\"', link_prop) + elif str(link_prop).startswith('type="'): + type_match = re.search('type=\"(.*?)\"', link_prop) + if type_match: + found_type = type_match[1] + if rel_match: + found_rel = rel_match[1] + signposting_link_dict = {'url': found_link[1:-1], 'type': found_type, 'rel': found_rel} + if found_link: + self.fuji.signposting_header_links.append(signposting_link_dict) + ''' if found_rel: if self.fuji.signposting_header_links.get(found_rel[1]): self.fuji.signposting_header_links[found_rel[1]].append(found_link[1:-1]) else: self.fuji.signposting_header_links[found_rel[1]]=[found_link[1:-1]] + ''' #check if there is a cite-as signposting link if self.fuji.pid_scheme is None: - signposting_pid = self.fuji.signposting_header_links.get('cite-as') + signposting_pid_link = self.fuji.get_signposting_links('cite-as') + signposting_pid = signposting_pid_link[0].get('url') if signposting_pid: found_ids = idutils.detect_identifier_schemes(signposting_pid[0]) if len(found_ids) > 1: From 7da488d6689701df193c480faaed63d4b59fbcd0 Mon Sep 17 00:00:00 2001 From: huberrob Date: Thu, 17 Dec 2020 20:36:14 +0100 Subject: [PATCH 2/3] software version in output --- fuji_server/controllers/fair_check.py | 1 + fuji_server/controllers/fair_object_controller.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/fuji_server/controllers/fair_check.py b/fuji_server/controllers/fair_check.py index dd2734a8..e42efd30 100644 --- a/fuji_server/controllers/fair_check.py +++ b/fuji_server/controllers/fair_check.py @@ -95,6 +95,7 @@ class FAIRCheck: FILES_LIMIT = None LOG_SUCCESS = 25 VALID_RESOURCE_TYPES = [] + FUJI_VERSION = 'v1.0.1' def __init__(self, uid, test_debug=False, oaipmh=None, use_datacite=True): uid_bytes = uid.encode('utf-8') diff --git a/fuji_server/controllers/fair_object_controller.py b/fuji_server/controllers/fair_object_controller.py index e913c2cf..c8c1b7d7 100644 --- a/fuji_server/controllers/fair_object_controller.py +++ b/fuji_server/controllers/fair_object_controller.py @@ -99,6 +99,6 @@ def assess_by_id(body): # noqa: E501 metric_spec = Preprocessor.metric_specification metric_version = os.path.basename(Preprocessor.METRIC_YML_PATH) totalmetrics = len(results) - final_response = FAIRResults(timestamp= timestmp, test_id= ft.test_id, metric_version=metric_version, metric_specification=metric_spec, total_metrics=totalmetrics, results=results) + final_response = FAIRResults(timestamp= timestmp, software_version=ft.FUJI_VERSION,test_id= ft.test_id, metric_version=metric_version, metric_specification=metric_spec, total_metrics=totalmetrics, results=results) return final_response From 2ba571b8f96bfe60e8a51d261a57605e8107c4d8 Mon Sep 17 00:00:00 2001 From: huberrob Date: Thu, 17 Dec 2020 20:36:45 +0100 Subject: [PATCH 3/3] signposting bug fix --- fuji_server/evaluators/fair_evaluator_persistent_identifier.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fuji_server/evaluators/fair_evaluator_persistent_identifier.py b/fuji_server/evaluators/fair_evaluator_persistent_identifier.py index 2626dd49..5a3d3885 100644 --- a/fuji_server/evaluators/fair_evaluator_persistent_identifier.py +++ b/fuji_server/evaluators/fair_evaluator_persistent_identifier.py @@ -87,7 +87,8 @@ def evaluate(self): #check if there is a cite-as signposting link if self.fuji.pid_scheme is None: signposting_pid_link = self.fuji.get_signposting_links('cite-as') - signposting_pid = signposting_pid_link[0].get('url') + if signposting_pid_link: + signposting_pid = signposting_pid_link[0].get('url') if signposting_pid: found_ids = idutils.detect_identifier_schemes(signposting_pid[0]) if len(found_ids) > 1: