(in-package :cl-user)
(define-ontology uri-resolution ()
(class !retrievalMethod
"A way of getting the contents of an information resource given it's uri. A retrieval method is a level of indirection intended to enable the various use cases that LSIDs and other persistent identifier schemes were envisioned for. Each information resource may be associated with one or more methods via the getMethod property. Absent any retrieval methods, it is understood that standardURIRetrieval should be attempted. This ontology does not put a preference order on methods, but applications may wish to. Each of the methods should be attempted and the first that succeeds returns the contents of the information resource."
:partial !notAnInformationResource)
(object-property !getMethod
"A property relating an information resource to a method for determining the location of that resource"
(domain !informationResource)
(range !retrievalMethod))
(class !informationResource
"An information resource is something where the thing itself can conceptually be retrieved as bits over a wire."
:partial (restriction !getMethod (all-values-from !retrievalMethod)))
(class !notAnInformationResource
"Not an information resource is anything that is not an information resource. Like a person, a kettle, an idea etc"
:complete (complement-of !informationResource))
(class !unchangingInformationResource
"Information resources that never change. You may confidently cache them."
:partial !informationResource)
(class !evolvingInformationResource
"Information resources that may change. Subclasses could be defined to represent different behaviours"
:complete
(intersection-of !informationResource
(complement-of !unchangingInformationResource)))
(class !standardURIRetrieval
"Use the URI of the individal as is"
:partial !retrievalMethod)
(individual !standardURIRetrievalMethod
"singleton"
(type !standardURIRetrieval))
(datatype-property !matchingPattern)
(datatype-property !replacementPattern)
(class !transformingURIRetrieval
"In a transformingURIRetrieval, the URI is matched against a regular expression with group captures. Then the groups can be substituted into a replacement pattern to form a url suitable for get-url. E.g. lsid http proxy"
:partial
(intersection-of
!retrievalMethod
(restriction !matchingPattern (cardinality 1))
(restriction !replacementPattern (cardinality 1))))
(datatype-property !queryPattern)
(datatype-property !URIVariableString)
(object-property !useSPARQLEndpoint)
(class !SPARQLEndpoint
"A sparql endpoint can accept sparql queries. The sparql query constructed is expected to have a single variable ?url. It is constructed by substituting the uri of the individual with the URIVariableString in the !queryPattern. This query is then run against the endpoint in the property !useSPARQLEndpoint"
:partial !notAnInformationResource)
(class !SPARQLRetrieval :partial
(intersection-of
!retrievalMethod
(restriction !queryPattern (cardinality 1))
(restriction !URIVariableString (cardinality 1))
(restriction !useSPARQLEndpoint (some-values-from !SPARQLEndpoint))
(restriction !useSPARQLEndpoint (all-values-from !SPARQLEndpoint))))
(class !WSDL "A wsdl document" :partial !informationResource)
(datatype-property !webServiceMethodName (range !xsd:string))
(object-property !definedByWsdl)
(class !webServiceRetrieval
"A web service can be used to retrieve the contents of an information resource. In this case you need to specify the WSDL, and a name of a method that takes a single parameter, the uri, and returns the contents"
:partial
(intersection-of
!retrievalMethod
(restriction !definedByWsdl (some-values-from !WSDL))
(restriction !definedByWsdl (cardinality 1))
(restriction !webServiceMethodName (cardinality 1))
))
(class !informationResourceFormat
"Class of information resource formats. Each instance is like a mime type, but only specifies the format, not the type of content. Each format has a pointer to a specification"
:partial !notAnInformationResource)
(object-property !formatSpecification (range !informationResource))
(class !rdfFormat :partial !informationResourceFormat)
(object-property !resourceFormat)
(individual !rdfxml (type !rdfFormat) (type !xmlBasedFormat)
(value !formatSpecification
(individual !
"http://www.w3.org/TR/rdf-syntax-grammar/"
(value !resourceFormat !html))))
(individual !ntriple (type !rdfFormat)
(value !formatSpecification
(individual !
"http://www.w3.org/2001/09/rdfprimer/rdf-primer-20020127"
(value !resourceFormat !html))))
(class !xmlBasedFormat :partial !informationResourceFormat)
(individual !xml "otherwise unspecified xml"
(type !xmlBasedFormat)
(value !formatSpecification
(individual !
"http://www.w3.org/XML/"
(value !resourceFormat !html))))
(individual !fasta (type !informationResourceFormat)
(value !formatSpecification
(individual !
"http://en.wikipedia.org/wiki/Fasta_format"
(value !resourceFormat !html))))
(individual !jpeg (type !informationResourceFormat)
(value !formatSpecification
(individual !
"http://www.w3.org/Graphics/JPEG/itu-t81.pdf"
(value !resourceFormat !pdf))))
(individual !pdf (type !informationResourceFormat)
(value !formatSpecification
(individual !
"http://partners.adobe.com/public/developer/en/pdf/PDFReference16.pdf"
(value !resourceFormat !pdf))))
;; now for some specifics. The first example
(individual !
"http://neuroscientific.net/vitamin-source/endpoint-one/endpoint.php"
(type !SPARQLEndpoint))
(individual !
"http://neuroscientific.net/vitamin-source/endpoint-two/endpoint.php"
(type !SPARQLEndpoint))
(individual !vitaminSourceDemoMethod "Example of Matthias SPARQL endpoint for resolving URLs"
(type !SPARQLRetrieval)
(value !useSPARQLEndpoint !)
(value !useSPARQLEndpoint !)
(value !URIVariableString "%%URI%%")
(value !queryPattern "PREFIX biozen: SELECT ?url WHERE {%%URI%% biozen:download ?url . }"))
(class !vitaminSourceDemoThing :partial
(intersection-of
!informationResource
(restriction !getMethod (has-value !vitaminSourceDemoMethod))))
(individual ! "XML record for transcript NM_013987, http://www.example.org/NM_013987_XML"
(type !vitaminSourceDemoThing)
(value !resourceFormat !xml))
(individual ! "fasta record for transcript NM_013987 http://www.example.org/NM_013987_FASTA"
(type !vitaminSourceDemoThing)
(value !resourceFormat !fasta))
(individual ! "picture of Matthias"
(type !informationResource)
(value !resourceFormat !jpeg)
(value !getMethod !standardURIRetrievalMethod))
(individual !lsidProxyLsidInfoOrg "An example of an LSID http resolver from http://www.w3.org/mid/OF1B337842.BEA002A5-ON85257205.0060CC92-85257205.007805D9@us.ibm.com, implemented as a transformingURIRetrieval"
(type !transformingURIRetrieval)
(value !matchingPattern "(.*)")
(value !replacementPattern "http://lsid-info.org/$1"))
(class !dnaSequence "Representations of DNA sequences" :partial !informationResource)
(class !geneDatabaseRecord "Records describing genes" :partial !informationResource)
(individual !
"urn:lsid:ncbi.nlm.nih.gov.lsid.biopathways.org:genbank:30350027"
(type !dnaSequence)
(value !getMethod !lsidProxyLsidInfoOrg)
(value !resourceFormat !fasta))
(individual !
"urn:lsid:ncbi.nlm.nih.gov.lsid.biopathways.org:genbank:30350027?"
(type !geneDatabaseRecord)
(value !getMethod !lsidProxyLsidInfoOrg)
(value !resourceFormat !rdfxml))
)
(defun get-property-values (thing property &optional (kb *default-kb*))
(sparql `(:select (?value) () (,thing ,property ?value)) :flatten t :kb kb))
(defun get-information-resource-location (thing &optional (kb *default-kb*))
(let ((methods (get-property-values thing !getMethod)))
(loop for method in methods
for type = (car (direct-types method kb))
do
(cond ((eq type !SPARQLRetrieval)
(return-from get-information-resource-location
(let ((endpoints (get-property-values method !useSPARQLEndpoint))
(template-var (car (get-property-values method !URIVariableString)))
(query-template (car (get-property-values method !queryPattern))))
(second (assoc
"url"
(car (loop for endpoint in endpoints
append
(ask-sparql-endpoint-template-query (uri-full endpoint) query-template template-var thing)))
:test 'equal)))))
((eq type !standardURIRetrieval)
(return-from get-information-resource-location
thing))
((eq type !transformingURIRetrieval)
(let* ((matching-pattern (car (get-property-values method !matchingPattern)))
(replacement-pattern (car (get-property-values method !replacementPattern)))
(groups (count #\( matching-pattern)))
(loop with new = replacement-pattern
for el in (car (apply 'all-matches (uri-full thing) matching-pattern
(loop for i from 1
repeat groups collect i)))
for i from 1 to groups
for target = (format nil "$~a" i)
do (setq new (#"replace" new target el))
finally
(return-from get-information-resource-location new))))))))
(defun ask-sparql-endpoint-template-query (endpoint query-template template-var value)
(let ((query (#"replaceAll" query-template template-var
(if (uri-p value)
(format nil "<~a>" (uri-full value))
value))))
(loop for result in
(find-elements-with-tag
(find-element-with-tag
(xmls::parse
(get-url
(destructuring-bind (protocol host path) (car (all-matches endpoint "(.*?)://(.*?)(/.*)" 1 2 3))
(#"toString"(clean-uri host path protocol "" (concatenate 'string "query=" query))))))
"results")
"result" )
collect
(loop for binding in (find-elements-with-tag result "binding")
collect (list (attribute-named binding "name")
(third (third binding)))))))
;; ;; transformingURIRetrieval
;; (get-information-resource-location !)
;; => "http://lsid-info.org/urn:lsid:ncbi.nlm.nih.gov.lsid.biopathways.org:genbank:30350027?"
;; ;; as is
;; (get-information-resource-location !)
;; =>"http://neuroscientific.net/vitamin-source/fileserver/matthias.jpeg"
;; ;; sparql endpoint
;; (get-information-resource-location ! )
;; => "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nucleotide&id=7669537&rettype=xml"