Skip to content

Commit

Permalink
Merge pull request #85 from oeg-upm/dev
Browse files Browse the repository at this point in the history
adding translation for in-memory RML
  • Loading branch information
dachafra authored Oct 29, 2024
2 parents 7421fc9 + cbb9c9b commit 9556953
Show file tree
Hide file tree
Showing 30 changed files with 538 additions and 119 deletions.
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
1.1.5
1.2
6 changes: 5 additions & 1 deletion src/yatter/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,14 @@ def translate(yarrrml_data, mapping_format=RML_URI):
rml_mapping.extend(generate_database_connections(yarrrml_data, list_initial_sources))
rml_mapping.extend(add_logical_targets(yarrrml_data))
rml_mapping.extend(add_functions(yarrrml_data))
external_refs = []
try:
mappings, mapping_format = get_non_asserted_mappings(yarrrml_data, mapping_format)
for mapping in yarrrml_data.get(YARRRML_MAPPINGS):
if mapping_format == R2RML_URI:
source_list = add_table(yarrrml_data, mapping, list_initial_sources)
else:
source_list = add_source(yarrrml_data, mapping, list_initial_sources)
source_list, external_refs = add_source(yarrrml_data, mapping, list_initial_sources)
subject_list = add_subject(yarrrml_data, mapping, mapping_format)
pred = add_predicate_object_maps(yarrrml_data, mapping, mapping_format)
it = 0
Expand All @@ -37,6 +38,9 @@ def translate(yarrrml_data, mapping_format=RML_URI):
rml_mapping[len(rml_mapping) - 1] = rml_mapping[len(rml_mapping) - 1][:-2]
rml_mapping.append(".\n\n\n")
it = it + 1
external_refs = list(dict.fromkeys(external_refs))
for ref in external_refs:
rml_mapping.append(ref)

logger.info("RML content is created!")
rml_mapping_string = "".join(rml_mapping)
Expand Down
18 changes: 17 additions & 1 deletion src/yatter/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,15 @@
VOID_URI = 'http://rdfs.org/ns/void#'
FNML_URI = 'http://semweb.mmlab.be/ns/fnml#'
GREL_URI = 'http://users.ugent.be/~bjdmeest/function/grel.ttl#'
SD_URI = 'https://w3id.org/okn/o/sd/'

RML_PREFIX = '@prefix'
RML_BASE = '@base'
RML_LOGICAL_SOURCE_CLASS = 'rml:LogicalSource'
RML_LOGICAL_SOURCE = 'rml:logicalSource'
RML_SOURCE = 'rml:source'
RML_REFERENCE_FORMULATION = 'rml:referenceFormulation'
RML_REFERENCE_FORMULATION_CLASS = 'rml:ReferenceFormulation'
RML_ITERATOR = 'rml:iterator'
RML_REFERENCE = 'rml:reference'
RML_LANGUAGE_MAP = 'rml:languageMap'
Expand Down Expand Up @@ -103,7 +105,16 @@
D2RQ_USER = 'd2rq:username'
D2RQ_PASS = 'd2rq:password'


##############################################################################
############################# SD CONSTANTS ###########################
##############################################################################
SD_DATASET_SPEC = 'sd:DatasetSpecification'
SD_NAME = 'sd:name'
SD_HAS_DATA_TRANSFORMATION = 'sd:hasDataTransformation'
SD_HAS_SOFTWARE_REQUIREMENTS = 'sd:hasSoftwareRequirements'
SD_HAS_SOURCE_CODE= 'sd:hasSourceCode'
SD_PROGRAMMING_LANGUAGE = 'sd:programmingLanguage'
KG4DI_DEFINED_BY = 'kg4di:definedBy'
##############################################################################
############################# YARRRML CONSTANTS ###########################
##############################################################################
Expand All @@ -122,6 +133,11 @@
YARRRML_USERNAME = 'username'
YARRRML_PASSWORD = 'password'

YARRRML_STRUCTURE_DEFINER = 'structureDefiner'
YARRRML_SOFTWARE_SPECIFICATION = 'softwareSpecification'
YARRRML_PROGRAMMING_LANGUAGE = 'programmingLanguage'
YARRRML_SOFTWARE_REQUIREMENTS = 'softwareRequirements'

YARRRML_MAPPINGS = 'mappings' # used for mappings in conditions and mappings main key
YARRRML_MAPPING = 'mapping'
YARRRRL_MAPPINGS_SHORTCUT = 'm'
Expand Down
2 changes: 1 addition & 1 deletion src/yatter/predicateobject.py
Original file line number Diff line number Diff line change
Expand Up @@ -288,7 +288,7 @@ def ref_mapping(data, mapping, om, yarrrml_key, ref_type_property, mapping_forma
else:
if mapping_format == STAR_URI:
object = STAR_OBJECT
source_list = add_source(data, mapping_join, list_initial_sources)
source_list, external_refs = add_source(data, mapping_join, list_initial_sources)

number_joins_rml = len(subject_list) * len(source_list)
for i in range(number_joins_rml):
Expand Down
63 changes: 61 additions & 2 deletions src/yatter/source.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ def add_source(data, mapping, list_initial_sources):
source_template = "\t" + RML_LOGICAL_SOURCE + " [\n\t\ta " + RML_LOGICAL_SOURCE_CLASS + \
";\n\t\t" + RML_SOURCE + " "
final_list = []
external_references_list = []
sources = get_sources(data, mapping)
for source in sources:
db_identifier = mapping
Expand All @@ -40,13 +41,24 @@ def add_source(data, mapping, list_initial_sources):
if YARRRML_ACCESS in source:
if YARRRML_QUERY in source:
final_list.append(source_template + database_source(mapping, source, db_identifier))
elif YARRRML_STRUCTURE_DEFINER in source:
source, external_references = add_in_memory_source(mapping,source)
final_list.append(source_template + source)
if external_references is not None:
external_references_list.append(external_references)
else:
final_list.append(source_template + add_source_full(mapping, source))
elif type(source) is list:
final_list.append(source_template + add_source_simplified(mapping, source))
if "$(" in source[0]:
source, external_references = add_in_memory_source(mapping, source)
final_list.append(source_template + source)
if external_references is not None:
external_references_list.append(external_references)
else:
final_list.append(source_template + add_source_simplified(mapping, source))
else:
raise Exception("ERROR: source " + source + " in mapping " + mapping + " not valid")
return final_list
return final_list, external_references_list


def add_table(data, mapping, list_initial_sources):
Expand Down Expand Up @@ -105,6 +117,53 @@ def add_source_simplified(mapping, source):
+ source[1] + "\";\n\t];\n"
return source_rdf

def add_in_memory_source(mapping, source):
external_reference_formulation = None
if type(source) is list:
source = extend_in_memory(source)
source_rdf = "[\n\t\t\ta " + SD_DATASET_SPEC + ";\n\t\t\t"

access = str(source.get(YARRRML_ACCESS)).replace("$(","").replace(")","")
source_rdf += SD_NAME + " \"" + access + "\";\n"

if YARRRML_SOFTWARE_SPECIFICATION in source:
source_rdf += "\t\t\t" + SD_HAS_DATA_TRANSFORMATION + "[\n\t\t\t\t"

if YARRRML_SOFTWARE_REQUIREMENTS in source.get(YARRRML_SOFTWARE_SPECIFICATION):
software_requirements = str(source.get(YARRRML_SOFTWARE_SPECIFICATION)[YARRRML_SOFTWARE_REQUIREMENTS])
source_rdf += SD_HAS_SOFTWARE_REQUIREMENTS + " \""+ software_requirements +"\";\n\t\t\t\t"

if YARRRML_PROGRAMMING_LANGUAGE in source.get(YARRRML_SOFTWARE_SPECIFICATION):
programming_language = str(source.get(YARRRML_SOFTWARE_SPECIFICATION)[YARRRML_PROGRAMMING_LANGUAGE])
source_rdf += SD_HAS_SOURCE_CODE +"[\n\t\t\t\t\t" + SD_PROGRAMMING_LANGUAGE + " \"" + programming_language + "\";"
source_rdf +="\n\t\t\t\t];\n"

source_rdf += "\t\t\t];\n"
source_rdf += "\t\t];\n"

if YARRRML_ITERATOR in source:
source_rdf += "\t\t" + RML_ITERATOR + " \"" + source.get(YARRRML_ITERATOR) + "\";\n"

if YARRRML_REFERENCE_FORMULATION in source:
reference_formulation = str(source.get(YARRRML_REFERENCE_FORMULATION))
source_rdf += "\t\t" + RML_REFERENCE_FORMULATION + " ql:"+ reference_formulation + ";\n"
external_reference_formulation = "ql:" + reference_formulation + " a " + RML_REFERENCE_FORMULATION_CLASS +";\n"
external_reference_formulation +="\t" + KG4DI_DEFINED_BY +" \""+source.get(YARRRML_STRUCTURE_DEFINER) +"\"."
source_rdf += "\t];\n"



return source_rdf, external_reference_formulation

def extend_in_memory(source):
features = source[0].split("~")
access = features[0]
defined_by = features[1].split("-")[0]
reference_formulation = features[1].split("-")[1]
extended_source = {"access": access, "structureDefiner": defined_by, "referenceFormulation": reference_formulation}
if len(source) == 2:
extended_source["iterator"] = source[1]
return extended_source

def add_source_full(mapping, source):
source_rdf = ""
Expand Down
46 changes: 46 additions & 0 deletions test/rml-in-memory/IMTC001/mapping.ttl
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
@prefix insta: <http://instagram.com/data/>.
@prefix rr: <http://www.w3.org/ns/r2rml#>.
@prefix rml: <http://semweb.mmlab.be/ns/rml#>.
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>.
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>.
@prefix xsd: <http://www.w3.org/2001/XMLSchema#>.
@prefix sd: <https://w3id.org/okn/o/sd/>.
@prefix kg4di: <https://w3id.org/kg4di/>.
@prefix ql: <http://semweb.mmlab.be/ns/ql#>.
@base <http://example.com/ns#>.


<df_map_0> a rr:TriplesMap;

rml:logicalSource [
a rml:LogicalSource;
rml:source [
a sd:DatasetSpecification;
sd:name "output_dataframe";
sd:hasDataTransformation[
sd:hasSoftwareRequirements "pandas>=1.1.0";
sd:hasSourceCode[
sd:programmingLanguage "Python3.9";
];
];
];
rml:referenceFormulation ql:DataFrame;
];
rr:subjectMap [
a rr:SubjectMap;
rr:template "http://instagram.com/data/user{Id}";
];
rr:predicateObjectMap [
rr:predicateMap [
a rr:PredicateMap;
rr:constant rdf:type;
];
rr:objectMap [
a rr:ObjectMap;
rr:constant insta:User;
];
].


ql:DataFrame a rml:ReferenceFormulation;
kg4di:definedBy "Pandas".
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,10 @@ mappings:
sources:
access: $(output_dataframe)
referenceFormulation: DataFrame
structureDefiner: Pandas
softwareSpecification:
programmingLanguage: Python3.9
softwareRequirements: pandas>=1.1.0
s: http://instagram.com/data/user{Id}
structureDefiner: Pandas
softwareSpecification:
programmingLanguage: Python3.9
softwareRequirements: pandas>=1.1.0
s: http://instagram.com/data/user$(Id)
po:
- [a, insta:User]
43 changes: 0 additions & 43 deletions test/rml-in-memory/IMTC001/mappings.rml.ttl

This file was deleted.

26 changes: 26 additions & 0 deletions test/rml-in-memory/IMTC001/test_imtc001.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
__author__ = "Ioannis Dasoulas"
__credits__ = ["Ioannis Dasoulas"]

__license__ = "Apache-2.0"
__maintainer__ = "David Chaves-Fraga"
__email__ = "[email protected]"


import os
from ruamel.yaml import YAML
import yatter
from rdflib.graph import Graph
from rdflib import compare
RML_URI = 'http://semweb.mmlab.be/ns/rml#'


def test_imtc001():
expected_mapping = Graph()
expected_mapping.parse(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'mapping.ttl'), format="ttl")

translated_mapping = Graph()
yaml = YAML(typ='safe', pure=True)
mapping_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'mapping.yml')
translated_mapping.parse(data=yatter.translate(yaml.load(open(mapping_path)), mapping_format=RML_URI), format="ttl")

assert compare.isomorphic(expected_mapping, translated_mapping)
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,11 @@
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>.
@prefix xsd: <http://www.w3.org/2001/XMLSchema#>.
@prefix sd: <https://w3id.org/okn/o/sd/>.
@prefix kg4di: <https://w3id.org/kg4di/definedBy>.
@prefix kg4di: <https://w3id.org/kg4di/>.
@prefix ql: <http://semweb.mmlab.be/ns/ql#>.
@base <http://example.com/ns#>.


<df_map> a rr:TriplesMap;
<df_map_0> a rr:TriplesMap;

rml:logicalSource [
a rml:LogicalSource;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ mappings:
sources:
access: $(output_dataframe)
referenceFormulation: DataFrame
structureDefiner: Pandas
s: http://instagram.com/data/user{Id}
structureDefiner: Pandas
s: http://instagram.com/data/user$(Id)
po:
- [a, insta:User]
- [a, insta:User]
26 changes: 26 additions & 0 deletions test/rml-in-memory/IMTC002/test_imtc002.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
__author__ = "Ioannis Dasoulas"
__credits__ = ["Ioannis Dasoulas"]

__license__ = "Apache-2.0"
__maintainer__ = "David Chaves-Fraga"
__email__ = "[email protected]"


import os
from ruamel.yaml import YAML
import yatter
from rdflib.graph import Graph
from rdflib import compare
RML_URI = 'http://semweb.mmlab.be/ns/rml#'


def test_imtc002():
expected_mapping = Graph()
expected_mapping.parse(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'mapping.ttl'), format="ttl")

translated_mapping = Graph()
yaml = YAML(typ='safe', pure=True)
mapping_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'mapping.yml')
translated_mapping.parse(data=yatter.translate(yaml.load(open(mapping_path)), mapping_format=RML_URI), format="ttl")

assert compare.isomorphic(expected_mapping, translated_mapping)
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,11 @@
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>.
@prefix xsd: <http://www.w3.org/2001/XMLSchema#>.
@prefix sd: <https://w3id.org/okn/o/sd/>.
@prefix kg4di: <https://w3id.org/kg4di/definedBy>.
@prefix kg4di: <https://w3id.org/kg4di/>.
@prefix ql: <http://semweb.mmlab.be/ns/ql#>.
@base <http://example.com/ns#>.


<df_map> a rr:TriplesMap;
<df_map_0> a rr:TriplesMap;

rml:logicalSource [
a rml:LogicalSource;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,6 @@ mappings:
df_map:
sources:
- [$(output_dataframe)~Pandas-DataFrame]
s: http://instagram.com/data/user{Id}
s: http://instagram.com/data/user$(Id)
po:
- [a, insta:User]
Loading

0 comments on commit 9556953

Please sign in to comment.