-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathadsrdf.py
executable file
·166 lines (145 loc) · 6.14 KB
/
adsrdf.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
#layer for ads like things on top of pysesame and rdflib or other. currently only pysesame.
from pysesame import connection, SPJSON, SPXML, SPCXML
#from bib2rdf2 import record_as_rdf, InvalidBibcode
import namespaces, types, StringIO
from rdflib import plugin, URIRef, BNode
from rdflib import ConjunctiveGraph
SPJSON='application/sparql-results+json'
SPXML='application/sparql-results+xml'
SPCXML='application/rdf+xml'
SPCN3='text/rdf+n3'
SPATXT='text/boolean'
SPOC={
's':'subj',
'o':'obj',
'p':'pred',
'c':'context'
}
#This document must:
"""
We must using this module be able to
(a) set up a connection with proper namespaces
(b) add a file under a given context to the repository
(c) ntriple encode namespave given thingies for context and other queries
This will require wrapping and normalizing some things in the connection class
Also cleany separate encoding from one file to the other, the pysesame connector
needs to be general too and know nothing about namespaces.
(d) provide an interface easily usable by a script, a load script (ie manage transaction)
and by a long running web server
(e) provide python dictionary interfaces to both rdf and json outs using rdflib
"""
#you dont want to directly use PUT on a context for just one thing as that would replace
#the elements on that context each time. rather to delete data in that context, figure which files
#were in that context, and repost. This is under the assumption that multiple files or
#transactions go into the population of a context
#create as a delegate class with added translation functionality
#TODO: We should create a cache for stuff like getSataBySP which does getDataByS and
#then caches results so we dont hit triplestore again and again
class ADSConnection:
def __init__(self, sesameURL, repository):
c=connection(sesameURL)
c.use_repository(repository)
for ele in namespaces.namespace_dict.keys():
#print ele, str(namespaces.namespace_dict[ele])
c.addnamespace(ele, str(namespaces.namespace_dict[ele]))
#tsc is triple store connection. we need to first write this, then translate to a driver architecture
self.tsc=c
#Build
def makeQuery(self, query, type=SPJSON):
data=self.tsc.querypost(query, type)
return data
def deleteData(self, context=None):
if context:
self.tsc.deletedata(namespaces.n3encode(context))
else:
self.tsc.deletedata()
def getDataInContext(self, context=None):
if context:
data=self.tsc.get_in_context(namespaces.n3encode(context))
else:
data=self.tsc.get_in_context()
return data
def getDataBySPO(self, thingy, thingytype="s", context=None):
qdict={}
if context:
qdict['c']=namespaces.n3encode(context)
qdict[thingytype]=namespaces.n3encode(thingy)
#print "QDICT", qdict
data=self.tsc.query_statements(qdict)
return data
#BUG do not handle if value corresponding to a key is a list, ie we dont handle two subjects for eg.
def getDataByDict(self, thedict, context=None):
qdict={}
if context:
qdict['c']=namespaces.n3encode(context)
for ele in thedict.keys():
if type(thedict[ele])==types.ListType:#NOT SUPPORTED BY SESAME
qdict[ele]=[]
for themem in thedict[ele]:
qdict[ele].append(namespaces.n3encode(themem))
else:
qdict[ele]=namespaces.n3encode(thedict[ele])
#print "QDICT", qdict
data=self.tsc.query_statements(qdict)
return data
def getDataByType(self, thetype, context=None):
thedict={'p':'rdf:type', 'o':thetype}
data=self.getDataByDict(thedict, context)
bg=ConjunctiveGraph()
namespaces.bindgraph(bg)
res=bg.parse(StringIO.StringIO(data))
listofo=[]
for trip in res:
listofo.append(str(trip[0].encode('utf-8')))
return listofo
def getDataBySP(self, thingy, propthingy, context=None):
qdict={}
if context:
qdict['c']=namespaces.n3encode(context)
#print "THINGY", thingy
if len(thingy.split(':'))>1:
qdict['s']=namespaces.n3encode(thingy)
else:
qdict['s']=thingy
qdict['p']=namespaces.n3encode(propthingy)
#print "qdict", qdict
data=self.tsc.query_statements(qdict)
#print data
bg=ConjunctiveGraph()
namespaces.bindgraph(bg)
#abnode=BNode()
res=bg.parse(StringIO.StringIO(data))
listofo=[]
#this bnode crap is very fragile TODO:replace
for trip in res:
listofo.append(str(trip[2].encode('utf-8')))
return listofo
def addFile(self, thefile,context=None):
if context:
self.tsc.postfile(thefile, namespaces.n3encode(context))
else:
self.tsc.postfile(thefile)
if __name__=="__main__":
import sys
c=ADSConnection('http://localhost:8081/openrdf-sesame/', 'testads3')
#result=c.getDataBySPO('uri_bib:1998MNRAS.293..306Q')
# bibcode='1998MNRAS.293..306Q'
# bibcodeuri='uri_bib:'+bibcode
# result1=c.getDataBySP(bibcodeuri, 'adsbib:keywordConcept')
# result2=c.getDataBySP(bibcodeuri, 'adsbase:title')
# result3=c.getDataBySP(bibcodeuri, 'pav:authoredBy')
# print "<<\n",bibcodeuri, result1, result2, result3, "\n>>"
# thedict={
# 's': 'uri_bib:1998MNRAS.293..306Q',
# 'p': ['adsbib:keywordConcept', 'adsbase:title'] #dosent work
#
# }
researchpapers=c.getDataByType('adsobsv:Datum')
print len(researchpapers)
#<http://ads.harvard.edu/sem/context#a40e27dc-8abb-4698-bd9b-415b60d3cfb4-chandra/loadfiles-obsv.py-0.1>
#c.deleteData('uri_context:a40e27dc-8abb-4698-bd9b-415b60d3cfb4-chandra/loadfiles-obsv.py-0.1')
#c.getDataInContext('uri_context:a40e27dc-8abb-4698-bd9b-415b60d3cfb4-chandra/loadfiles-obsv.py-0.1')
researchpapers=c.getDataByType('adsobsv:Datum')
print len(researchpapers)
#result=c.getDataByDict(thedict)
#print result