(in-package :cl-user) (defun kegg-generic-formula-match () "Wouldn't be matched by KEGG because not definedBy" (set-difference (sparql '(:select (?mol1 ?mol2) (:distinct t) (?mol1 !rdf:type !bug:smallMolecule) (?mol1 !bug:classifiedByKEGG ?kegg) (?mol2 !bug:classifiedByKEGG ?kegg) (?mol1 !bug:hasChemicalFormula ?formula) (?mol2 !bug:hasChemicalFormula ?formula) (:filter (and (is-canonical ?mol1) (not (equal ?mol1 ?mol2))))) :use-reasoner :jena) (sparql '(:select (?mol1 ?mol2) () (?mol1 !rdf:type !bug:smallMolecule) (?mol2 !owl:sameAs ?mol1) (:filter (and (is-canonical ?mol1) (not (equal ?mol1 ?mol2))))) :use-reasoner :jena) :test 'equalp)) (defun sames-sparql () "Which pairs of small molecules are considered to be the same?" (sparql '(:select (?mol1 ?mol2) () (?mol1 !rdf:type !bug:smallMolecule) (?mol2 !owl:sameAs ?mol1) (:filter (and (is-canonical ?mol1) (not (equal ?mol1 ?mol2))))) :use-reasoner :jena)) (defun fuzzy-name-matches () (let ((has-same (make-hash-table :test 'equal))) (loop for (this that) in (sames-sparql) do (setf (gethash this has-same ) that) (setf (gethash that has-same ) this)) (loop with name2mol = (make-hash-table :test 'equal) and them for (mol name) in (sparql '(:select (?mol1 ?name) (:distinct t) (?mol1 !rdf:type !bug:smallMolecule) (?mol1 !bug:hasName ?name)) :use-reasoner :jena) for fuzzed = (string-downcase (#"replaceAll" name "[^a-zA-Z0-9]" "")) do (unless (gethash mol has-same) (push (list mol name) (gethash fuzzed name2mol))) finally (maphash (lambda(k v) (when (> (length v) 1) (push (mapcar 'car v) them))) name2mol) (return them)))) (defun sameindividual-owl-for-fuzzy-name-matches (&optional (stream t)) (loop for set in (fuzzy-name-matches) do (format stream "SameIndividual(~{<~a>~^ ~})~%" (mapcar 'uri-full set)))) (defun generate-sames-by-name () (with-open-file (f "bug:sames-by-name.absowl" :direction :output :if-exists :supersede) (sameindividual-owl-for-fuzzy-name-matches f)))