todo: graph and cv generation

WCRP-CMIP · Jul 25, 2024 · fb8df9e · fb8df9e
1 parent ed33259
commit fb8df9e
Show file tree

Hide file tree

Showing 29 changed files with 4,046 additions and 720 deletions.
diff --git a/cmipld/cvs/CV.json b/cmipld/cvs/CV.json
diff --git a/cmipld/cvs/generate.py b/cmipld/cvs/generate.py
@@ -20,8 +20,8 @@ async def main():
     # latest = await CMIPFileUtils.load_latest(CMIPFileUtils)
 
     # manual read since we are in development. 
-    mip= await CMIPFileUtils.read_file_fs('/Users/daniel.ellis/WIPwork/mip-cmor-tables/JSONLD/scripts/compiled/graph_data.json')
-    cmip6plus =  await CMIPFileUtils.read_file_fs('/Users/daniel.ellis/WIPwork/CMIP6Plus_CVs/JSONLD/scripts/compiled/graph_data.json')
+    mip= await CMIPFileUtils.read_file_fs('/Users/daniel.ellis/WIPwork/mip-cmor-tables/compiled/graph_data.json')
+    cmip6plus =  await CMIPFileUtils.read_file_fs('/Users/daniel.ellis/WIPwork/CMIP6Plus_CVs/ compiled/graph_data.json')
 
     latest = sum([mip,cmip6plus],[])
 
@@ -64,13 +64,14 @@ async def main():
     ### CMIP6Plus #####
     ##################################
     # organisations
-    for key in 'organisations source-id native-nominal-resolution activity-id experiment-id sub-experiment-id'.split():
+    # native-nominal-resolution
+    for key in 'organisations source-id activity-id experiment-id sub-experiment-id'.split():
 
         # run the frame. 
         frame = get_frame('cmip6plus',key)
 
         # get results using frame
-        data = Frame(latest,frame).clean()
+        data = Frame(latest,frame)
 
         # any additional processing?
         add_new = await process('cmip6plus',key,data)
@@ -86,16 +87,11 @@ async def main():
     ### fix the file #####
     ##################################
 
-    def rename_keys(d):
-        # rename dictionary keys from '-' to '_'
-        if isinstance(d, dict): return {k.replace('-', '_'): rename_keys(v) for k, v in d.items()}
-        elif isinstance(d, list): return [rename_keys(item) for item in d]
-        else: return d
 
-    CV = OrderedDict(sorted((k, (v)) for k, v in rename_keys(CV).items()))
+    CV = OrderedDict(sorted((k, (v)) for k, v in CV.items()))
 
     with open('CV.json','w') as f:
-            json.dump(CV,f,indent=4)    
+            json.dump(dict(CV = CV),f,indent=4)    
 
 
 

diff --git a/cmipld/cvs/parse.py b/cmipld/cvs/parse.py
@@ -1,4 +1,6 @@
 
+from collections import OrderedDict
+
 def name_description(data,key='name',value='description'):
     return dict([(x[key],x[value]) for x in data])
 
@@ -27,52 +29,64 @@ def mip_cmor_tables_grid_label (data):
 ##################################
 
 def cmip6plus_organisations (data):
-    data = [d['organisation-id'] for d in data]
-    return name_description(data,key='cmip-acronym',value='name')
+    data = [d['organisation_id'] for d in data]
+    return name_description(data,key='cmip_acronym',value='name')
+
 def cmip6plus_descriptors (data):
     data.update(data['index'])
     del data['index']
+    data['DRS'] = data['drs']
+    del data['drs']
     return data
 
 def cmip6plus_source_id (data):
-    sid = {}
-    for source in data:
-
-        source['organisation-id'] = source['organisation-id'].get('cmip-acronym','')
+    sid = OrderedDict()
+    for source in sorted(data,key=lambda x: x['source_id']):
+        # ideally organisation 
+        source['institution_id'] = source['organisation_id'].get('cmip_acronym','')
 
         source['license'].update(source['license'].get('kind',{}))
 
         del source['license']['kind']
         del source['license']['conditions']
 
-        source['source'] = f"{source['name']} ({source['release-year']}): \n  "
+        source['source'] = f"{source['source_id']} ({source['release_year']}): \n  "
 
 
         #    combine the model-components
-        for i in source['model-component']:
+        for i in source['model_component']:
             try:
-                source['source'] += f"{i['name']} ({i['realm']})\n  "
+                source['source'] += f"{i['source_id']} ({i['realm']})\n  "
             except:
-                print('Missing',i, source['name'])
+                print('Missing',i, source['source_id'])
 
 
 
-        del source['model-component']
-
-        source['source_id'] = source['name']
+        del source['model_component']
 
         sid[source['source_id']] = source
 
     return sid
 
 def cmip6plus_native_nominal_resolution (data):
     print(data[0])
-    return list(set([f"{x['nominal-resolution'].get('value',x['nominal-resolution'])}{x['nominal-resolution'].get('unit',{}).get('si','km')}" for x in data]))
+    return list(set([f"{x['nominal_resolution'].get('value',x['nominal_resolution'])}{x['nominal_resolution'].get('unit',{}).get('si','km')}" for x in data]))
 
 
 def cmip6plus_sub_experiment_id (data):
-    return name_description(data,'sub-experiment-id','description')
+    return name_description(data,'sub_experiment_id','description')
+
 
+def cmip6plus_experiment_id (data):
+    eid = OrderedDict()
+    for e in sorted(data,key=lambda x: x['experiment_id']):
+        for i in ['additional_allowed','required']:
+            if isinstance(e['model_components'][i],str):
+
+                e['model_components'][i] = [e['model_components'][i]]
+        eid[e['experiment_id']] = e
+
+    return eid
 
 
 
@@ -83,9 +97,10 @@ def cmip6plus_sub_experiment_id (data):
 
 async def process(prefix,file,data=None):
     name = f'{prefix}_{file}'.replace('-','_')
+    # prepare for use. 
 
-    data.clean()
-    # print(list(local_globals)
+    data.clean_cv
+    data.print 
     if name in local_globals:
         data.data = local_globals[name](data.data) 
     else:

diff --git a/cmipld/file_io.py b/cmipld/file_io.py
@@ -3,6 +3,7 @@
 import json
 import base64
 import asyncio
+import sys
 
 
 '''
@@ -90,8 +91,6 @@ async def load_latest(self):
 
         return sum([await self.gh_read_file(*f) for f in latest],[])
 
-
-
 
 
     @staticmethod
@@ -102,3 +101,20 @@ def write_file(content, file):
             print(f"JSON data has been written to {file}")
         except Exception as e:
             print(f"Error writing to file: {e}")
+
+
+    @staticmethod
+    async def load(lddata:list):
+        read = []
+        for f in lddata:
+            if 'http' in f:
+                read.append(await CMIPFileUtils.read_file_url(f))
+            elif f in LatestFiles.entries:
+                read.append(await CMIPFileUtils.gh_read_file(*LatestFiles.entries[f]))
+            elif os.path.exists(f):
+                read.append(await CMIPFileUtils.read_file_fs(f))
+            else:
+                sys.exit(f"File {f} not found")
+
+
+        return sum(read,[])
diff --git a/cmipld/frame_ld/__init__.py b/cmipld/frame_ld/__init__.py
@@ -42,7 +42,10 @@ def __init__(self,source,frame,nograph=True):
         self.source = source
         self.frame = frame
 
-        if nograph:
+        # if "@embed" not in frame: # make sure each instance is embedded. 
+        #     self.frame["@embed"] = "@always"
+
+        if nograph: # usually used if framing an id directly
             self.data = self.graph_only(jsonld.frame(source, frame))
         else:
             self.data = jsonld.frame(source, frame)
@@ -74,6 +77,10 @@ def clean(self, process = None):
 
         self.end
         return self
+
+    @property
+    def clean_cv(self):
+        return self.clean(['missing','untag','lower'])
 
     # @staticmethod
     @property
@@ -109,6 +116,18 @@ def flatten(self):
         self.json_string = re.sub(r'{\s*"([^"]*?)":\s*"(.+)"\s*}', r'"\2"', self.json_string)
         return self
 
+    @property 
+    @direct
+    def lower(self):
+        self.json_string = re.sub(r'(?<=")([^"]*?)-(.*?)(?=":)', lambda m: m.group(0).replace('-', '_') , self.json_string)
+        return self
+
+    @property 
+    @direct
+    def missing(self):
+        self.json_string = re.sub(r'\{\s*"@id"\s*:\s*"([^"]+)"\s*\}', r'"Missing Link: \1"' , self.json_string)
+        return self
+
     @staticmethod
     def graph_only(json_object):
         return json_object.get('@graph', {})

diff --git a/cmipld/frame_ld/examples/cmip6plus/activity-id.json b/cmipld/frame_ld/examples/cmip6plus/activity-id.json
@@ -1,6 +1,11 @@
 {
+    "@context": {
+        "@vocab": "activity-id",
+        "@context": {
+            "@extend": true
+        }
+    },
     "@type": "mip:activity-id",
-        "activity-id:name": "",
-        "@explicit": true
-
+    "activity-id:name": "",
+    "@explicit": true
 }
diff --git a/cmipld/frame_ld/examples/cmip6plus/descriptors.json b/cmipld/frame_ld/examples/cmip6plus/descriptors.json
@@ -1,5 +1,6 @@
 {
     "@id": "cmip6plus:core/descriptors",
+    "@context":{"@extend":true},
     "@type": 
         "mip:core-descriptors"
     ,

diff --git a/cmipld/frame_ld/examples/cmip6plus/experiment-id.json b/cmipld/frame_ld/examples/cmip6plus/experiment-id.json
@@ -1,5 +1,15 @@
 {
+    "@context": {
+        "@vocab": "experiment-id",
+        "experiment-id:activity-id": {
+          "@extend": true
+        },
+        "experiment-id:sub-experiment-id": {
+          "@extend": true
+        }
+      },
     "@type": "mip:experiment-id",
+    "experiment-id": "",
     "experiment-id:activity-id": {
         "activity-id:name": {},
         "@explicit": true
@@ -13,5 +23,13 @@
             "source-type:name": "",
             "@explicit": true
         }
+    },
+    "experiment-id:sub-experiment-id": {
+        "sub-experiment-id:name": {},
+        "@explicit": true
+    },
+    "experiment-id:parent":{
+        "@embed":"@always"
     }
-}
+
+}
diff --git a/cmipld/frame_ld/examples/cmip6plus/organisations.json b/cmipld/frame_ld/examples/cmip6plus/organisations.json
@@ -1,4 +1,11 @@
 {
+    "@context": {
+    "@vocab": "institution",
+    "institution:location": {"@context":{
+      "@extend": true
+    }
+    }
+  },
     "@type": [
         "mip:source-id"
     ],

diff --git a/cmipld/frame_ld/examples/cmip6plus/source-id.json b/cmipld/frame_ld/examples/cmip6plus/source-id.json
@@ -1,34 +1,56 @@
 {
+    "@context": {
+        "@vocab": "source-id",
+        "source-id:activity-participation": {
+            "@context": {
+                "@extend": true
+            }
+        },
+        "source-id:model-component": {
+            "model-component:nominal-resolution": {
+                "model-native-nominal-resolution:cf:unit": {
+                    "@context": {
+                        "@extend": true
+                    }
+                }
+            },
+            "model-component:realm": {
+                "@context": {
+                    "@extend": true
+                }
+            }
+        },
+        "source-id:organisation-id": {
+            "@context": {
+                "@extend": true
+            }
+        }
+    },
     "@type": [
         "mip:source-id"
     ],
+    "source-id": "",
     "source-id:activity-participation": [
         {
             "activity-id:name": "",
             "@explicit": true
         }
     ],
-
     "source-id:license": {
-
         "@explicit": false
     },
-
-    "source-id:model-component": 
-        {
-            "model-component:name": "",
-            "model-component:realm": {
-                "realm:name": "",
-                "@explicit": true
-            },
-            "model_component:description": "",
-            "model_component:native-nominal-resolution": {
-                "native-nominal-resolution:value": "",
-                "@explicit": false,
-                "@expanded": true
-            },
+    "source-id:model-component": {
+        "model-component:name": "",
+        "model-component:realm": {
+            "realm:name": "",
             "@explicit": true
-
-        }
-
+        },
+        "model_component:description": "",
+        "model_component:native-nominal-resolution": {
+            "native-nominal-resolution:value": "",
+            "@explicit": false,
+            "@expanded": true
+        },
+        "@explicit": true
+    }
 }
diff --git a/cmipld/generate/__init__.py b/cmipld/generate/__init__.py
@@ -0,0 +1 @@
+