(in-package :common-lisp-user) (defparameter *nasty* '(#\")) (defvar *working-directory* "bug:") (defparameter *ijr904-compound-spreadsheet* "FBA_model-1-12-06-compounds.txt") (defvar *ecocyc-compound-spreadsheet* "EcoCyc-compounds-2006-02-16.txt") (defvar *log-io* nil) (defun read-patches (path) (let ((patches (make-hash-table :test 'equalp))) (when (probe-file path) (flet ((read-line-skip-comments (f) (loop for line = (read-line f nil :eof) until (eq line :eof) while (char= (char line 0) #\#) finally (return line)))) (with-open-file (f path) (read-line f) ; skip headers (loop for original = (read-line-skip-comments f) for new = (read-line-skip-comments f) until (eq original :eof) do (setf (gethash original patches) new))))) patches)) (defun maybe-patch (record patches) (when (gethash record patches) (and *log-io* (format *log-io* "Patching. Old: \"~a\" New: \"~a\"~%" (subseq record 0 (min (length record) 10)) (subseq (gethash record patches) 0 (min 10 (length (gethash record patches) ))))) (return-from maybe-patch (gethash record patches))) record) (defparameter *about-kegg* (let ((table (make-hash-table :test 'equal))) (with-open-file (f "bug:compound-generics") (loop for line = (read-line f nil :eof) until (eq line :eof) do (setf (gethash (subseq line 0 (position #\: line)) table) (cons :generic (subseq line (+ 2 (position #\: line))))) )) (with-open-file (f "bug:compound-no-formula") (loop for line = (read-line f nil :eof) until (eq line :eof) for split = (position #\: line) do (if (search "Transferred to " line) (setf (gethash (subseq line 0 split) table) (cons :transferred (subseq line (+ 2 split)))) (setf (gethash (subseq line 0 split) table) (cons :no-formula (subseq line (+ 2 split)))) ))) table)) (defun consider-kegg-id-generic (id) (gethash id *about-kegg*)) (defun to-absowl (spreadsheet noisy just-assertions writer generator) (let* ((spreadsheet-path (merge-pathnames spreadsheet (truename *working-directory*))) (absowl-path (merge-pathnames (make-pathname :type "absowl") spreadsheet-path)) (patches-path (merge-pathnames (make-pathname :type "patches") spreadsheet-path)) (log-path (merge-pathnames (make-pathname :type "log") spreadsheet-path))) (let ((patches (read-patches patches-path)) (*log-io* (if noisy *log-io* (open log-path :direction :output :if-exists :supersede)))) (unwind-protect (with-open-file (stream absowl-path :direction :output :if-exists :supersede) (unless just-assertions (write-ontology-header stream)) (unless just-assertions (write-string *mergeont* stream)) (funcall writer spreadsheet-path patches stream generator) (unless just-assertions (write-ontology-footer stream))) (if noisy (close *log-io* )))))) (defun ijr904-to-absowl (&key (spreadsheet *ijr904-compound-spreadsheet*) noisy just-assertions) (to-absowl spreadsheet noisy just-assertions 'write-ijr904-absowl)) (defun ijr904-to-absowl-fact++ (&key (spreadsheet *ijr904-compound-spreadsheet*) noisy just-assertions generator) (to-absowl spreadsheet noisy just-assertions 'write-ijr904-absowl-fact++ generator)) (defun ecocyc-to-absowl (&key (spreadsheet *ecocyc-compound-spreadsheet*) noisy just-assertions generator) (to-absowl spreadsheet noisy just-assertions 'write-ecocyc-absowl generator)) (defun ecocyc-to-absowl-fact++ (&key (spreadsheet *ecocyc-compound-spreadsheet*) noisy just-assertions generator) (to-absowl spreadsheet noisy just-assertions 'write-ecocyc-absowl-fact++ generator)) (defun write-ijr904-absowl (spreadsheet-path patches stream &optional generator) (with-open-file (f spreadsheet-path) (read-line f nil :eof) ; skip headers (loop for line = (maybe-patch (read-line f nil :eof) patches) with pattern = (#"compile" '|java.util.regex.Pattern| ".*Neutral MF = (\\S+).*") with all-ids until (eq line :eof) for (SimPheny abbreviation new Name formula charge casNumber KEGG notes) = (map 'list #"toString" (#"split" line "\\t")) for ijr904id = abbreviation for small-molecule-uri = (domain-uri "gcrg.ucsd.edu" "iJR904" iJR904id) unless (equal line ":delete") collect small-molecule-uri into all-small-molecules and do (identity new) (setq Name (string-trim '(#\") Name)) (let ((cas (and (valid-cas casNumber) (write-cas stream casNumber))) (kegg-uri (and (valid-kegg kegg) (write-kegg stream kegg))) (formcharge (and (valid-formula formula) (valid-charge charge) (write-formcharge stream formula charge))) (neutral-formula (let ((m (and notes (#"matcher" pattern notes))) ) (and notes (#"matches" m) (write-formcharge stream (#"group" m 1) "0"))))) (when (gethash (string-upcase kegg) *about-kegg*) (format *log-io* "Warning: used classifiedBy vs definedBy for ~a: ~a~%" kegg (gethash (string-upcase kegg) *about-kegg*))) (funcall (or generator 'write-smallmolecule) stream small-molecule-uri (list name) (if (consider-kegg-id-generic kegg) (cons kegg-uri nil) kegg-uri) cas nil formcharge neutral-formula )) finally (write-alldifferent stream all-small-molecules) ))) (defun write-ijr904-absowl-fact++ (spreadsheet-path patches stream generator) (with-open-file (f spreadsheet-path) (read-line f nil :eof) ; skip headers (loop for line = (maybe-patch (read-line f nil :eof) patches) with pattern = (#"compile" '|java.util.regex.Pattern| ".*Neutral MF = (\\S+).*") with all-ids until (eq line :eof) for (SimPheny abbreviation new Name formula charge casNumber KEGG notes) = (map 'list #"toString" (#"split" line "\\t")) for ijr904id = abbreviation for small-molecule-uri = (domain-uri "gcrg.ucsd.edu" "iJR904" iJR904id) unless (equal line ":delete") collect small-molecule-uri into all-small-molecules and do (identity new) (setq Name (string-trim '(#\") Name)) (let ((cas (and (valid-cas casNumber) (write-cas-fact++ stream casNumber))) (kegg-uri (and (valid-kegg kegg) (write-kegg-fact++ stream kegg))) (formcharge nil) (neutral-formula nil)) (push kegg-uri all-ids) (push cas all-ids) (when (gethash (string-upcase kegg) *about-kegg*) (format *log-io* "Warning: used classifiedBy vs definedBy for ~a: ~a~%" kegg (gethash (string-upcase kegg) *about-kegg*))) (funcall (or generator 'write-smallmolecule-fact++lisp) stream small-molecule-uri (list name) (if (consider-kegg-id-generic kegg) (cons kegg-uri nil) kegg-uri) cas nil formcharge neutral-formula )) finally (write-alldisjoint-fact++ stream all-small-molecules) (write-alldifferent-fact++ stream (remove-duplicates all-ids :test 'equal)) ))) (defun write-ecocyc-absowl (spreadsheet-path patches stream &optional generator ) (with-open-file (f spreadsheet-path) (read-line f nil :eof) ; skip headers (loop for line = (maybe-patch (read-line f nil :eof) patches) until (eq line :eof) for (ecocyc-id name formula smiles kegg casnumber) = (map 'list #"toString" (#"split" line "\\t")) for small-molecule-uri = (domain-uri "ecocyc.org" "compound" ecocyc-id) unless (equal line :delete) collect small-molecule-uri into all-small-molecules and do (when (and casnumber (> (length (setq casnumber (map 'list #"toString" (#"split" casnumber " ")))) 1)) (format t "You need to patch multiple cas in '~a'~%" line)) (identity smiles) ; ignore (setq casnumber (car casnumber)) (when (and kegg (> (length (setq kegg (map 'list #"toString" (#"split" kegg " ")))) 1)) (format t "You need to patch multiple kegg in '~a'~%" line)) (setq kegg (car kegg)) (let ((cas (and (valid-cas casNumber) (write-cas stream casNumber))) (kegg-uri (and (valid-kegg kegg) (write-kegg stream kegg))) (formcharge (and (valid-formula formula) (write-formcharge stream formula 0)))) (when (gethash (string-upcase kegg) *about-kegg*) (format *log-io* "Warning: used classifiedBy vs definedBy for ~a: ~a~%" kegg (gethash (string-upcase kegg) *about-kegg*))) (funcall (or generator 'write-smallmolecule) stream small-molecule-uri (list Name) (if (consider-kegg-id-generic kegg) (cons kegg-uri nil) kegg-uri) cas nil formcharge )) finally (write-alldifferent stream all-small-molecules) ))) (defun write-ecocyc-absowl-fact++ (spreadsheet-path patches stream generator ) (with-open-file (f spreadsheet-path) (read-line f nil :eof) ; skip headers (loop for line = (maybe-patch (read-line f nil :eof) patches) with all-ids until (eq line :eof) for (ecocyc-id name formula smiles kegg casnumber) = (map 'list #"toString" (#"split" line "\\t")) for small-molecule-uri = (domain-uri "ecocyc.org" "compound" ecocyc-id) unless (equal line :delete) collect small-molecule-uri into all-small-molecules and do (when (and casnumber (> (length (setq casnumber (map 'list #"toString" (#"split" casnumber " ")))) 1)) (format t "You need to patch multiple cas in '~a'~%" line)) (identity smiles) ; ignore (setq casnumber (car casnumber)) (when (and kegg (> (length (setq kegg (map 'list #"toString" (#"split" kegg " ")))) 1)) (format t "You need to patch multiple kegg in '~a'~%" line)) (setq kegg (car kegg)) (let ((cas (and (valid-cas casNumber) (write-cas-fact++ stream casNumber))) (kegg-uri (and (valid-kegg kegg) (write-kegg-fact++ stream kegg))) (formcharge nil)) (push cas all-ids) (push kegg all-ids) (when (gethash (string-upcase kegg) *about-kegg*) (format *log-io* "Warning: used classifiedBy vs definedBy for ~a: ~a~%" kegg (gethash (string-upcase kegg) *about-kegg*))) (funcall (or generator 'write-smallmolecule) stream small-molecule-uri (list Name) (if (consider-kegg-id-generic kegg) (cons kegg-uri nil) kegg-uri) cas nil formcharge )) finally (write-alldisjoint-fact++ stream all-small-molecules) (write-alldifferent-fact++ stream (remove-duplicates all-ids :test 'equal)) ))) (defun owl-load-ijr904 (&key (spreadsheet *ijr904-compound-spreadsheet*)) (let* ((spreadsheet-path (merge-pathnames spreadsheet (truename *working-directory*))) (absowl-path (merge-pathnames (make-pathname :type "absowl") spreadsheet-path))) (time (setq *default-kb* (load-kb-abstract (namestring (truename absowl-path))))) (time (check-abox-consistency)) )) (defun owl-load-ecocyc (&key (spreadsheet *ecocyc-compound-spreadsheet*)) (let* ((spreadsheet-path (merge-pathnames spreadsheet (truename *working-directory*))) (absowl-path (merge-pathnames (make-pathname :type "absowl") spreadsheet-path))) (setq *default-kb* (load-kb-abstract (namestring (truename absowl-path)))) (check-abox-consistency) )) (defun absowl-path (f &optional dir) (if dir (merge-pathnames (merge-pathnames (make-pathname :type "absowl") f) (translate-logical-pathname dir)) (merge-pathnames (make-pathname :type "absowl") f))) (defun spreadsheet-path (f &optional dir) (if dir (merge-pathnames (merge-pathnames (make-pathname :type "txt") f) (translate-logical-pathname dir)) (merge-pathnames (make-pathname :type "txt") f))) (defun patches-path (f &optional dir) (if dir (merge-pathnames (merge-pathnames (make-pathname :type "patches") f) (translate-logical-pathname dir)) (merge-pathnames (make-pathname :type "patches") f))) (defun owl-path (f &optional dir) (if dir (merge-pathnames (merge-pathnames (make-pathname :type "owl") f) (translate-logical-pathname dir))) (merge-pathnames (make-pathname :type "owl") f) ) (defun generate-combined-absowl (&key (ijr *ijr904-compound-spreadsheet*) (ecocyc *ecocyc-compound-spreadsheet*) (result "combined") (maps '("sames-by-name"))) (with-open-file (out (absowl-path result *working-directory*) :direction :output :if-exists :supersede) (write-ontology-header out) (write-string *mergeont* out) (write-ijr904-absowl (merge-pathnames ijr (translate-logical-pathname *working-directory*)) (read-patches (patches-path ijr *working-directory*)) out) (write-ecocyc-absowl (merge-pathnames ecocyc (translate-logical-pathname *working-directory*)) (read-patches (patches-path ecocyc *working-directory*)) out) (loop for map in maps when (probe-file (absowl-path map *working-directory*)) do (with-open-file (mapstream (absowl-path map *working-directory*)) (loop for line = (read-line mapstream nil :eof) until (eq line :eof) do (write-string line out) (terpri out)))) (write-ontology-footer out))) (defun owl-load-combined (&key (spreadsheet "combined") (recreate nil)) (flet ((file-newer-than (file1 file2) (> (file-write-date file1) (file-write-date file2))) ) (let* ((combined (merge-pathnames spreadsheet (truename *working-directory*))) (ijr904 (merge-pathnames *ijr904-compound-spreadsheet* (truename *working-directory*))) (ecocyc (merge-pathnames *ecocyc-compound-spreadsheet* (truename *working-directory*))) (absowl (absowl-path combined)) (owl (owl-path combined)) kb) (if (or recreate (not (probe-file absowl)) (file-newer-than (spreadsheet-path ijr904) absowl) (file-newer-than (patches-path ijr904) absowl) (file-newer-than (spreadsheet-path ecocyc) absowl) (file-newer-than (patches-path ecocyc) absowl)) (setq kb (progn (generate-combined-absowl :result spreadsheet) (owl-load-combined :spreadsheet spreadsheet))) (setq kb (load-kb-abstract absowl))) (multiple-value-bind (consistent? reason) (check-abox-consistency kb) (values kb consistent? reason)) ))) (defun write-ontology-header (stream) (write-string "Namespace(cas=) Namespace(kegg=) Namespace(form=) Namespace(a=) Namespace(paulsson=) Namespace(ecocyc=) Namespace(xsd=) Namespace(internal=) Ontology ( " stream)) (defun write-ontology-footer (stream) (format stream ")~%")) (defun valid-cas (string) (and string (not (equalp string "none"))(not (equalp string "")))) (defun valid-smiles (string) (and string (not (equalp string "none"))(not (equalp string "")))) (defun valid-kegg(string) (and string (not (equalp string "none")) (not (equalp string "")))) (defun valid-formula(string) (and string (not (equalp string "none")))) (defun valid-charge(string) (ignore-errors (parse-integer string))) ;; work around bug in OWL api that disallows ")" in a uri. Sheesh. ;; add that the & are not escaped when written to rdf (defparameter *uri-workaround-character-fixes* (load-time-value (loop for fixme in '(#\& #\ #\( #\) ) collect (list (#"compile" '|java.util.regex.Pattern| (format nil "[~c]" fixme)) (format nil "%~2x" (char-code fixme)))))) (defun clean-uri (site path &optional (protocol "http" )) (let ((null (load-time-value (make-immediate-object nil :ref)))) (loop for (pattern replacement) in *uri-workaround-character-fixes* with uri = (#0"toString" (new 'java.net.uri protocol site path null null)) for new = (#0"replaceAll" (#0"matcher" pattern uri) replacement) then (#0"replaceAll" (#0"matcher" pattern new) replacement) finally (return (#"toLowerCase" new)) ))) (defun id-uri (type &rest components) (clean-uri "www.biopax.org" (format nil "/~a/~{~a~^*~}" (string type) components))) (defun domain-uri (domain &rest components) (clean-uri domain (format nil "/~{~a~^/~}" components))) (defun encode-name (name) (let ((null (load-time-value (make-immediate-object nil :ref)))) (substitute-if #\_ (lambda(e) (member e *nasty* :test 'equal)) name) )) (defun write-cas (stream casid) (let ((name (id-uri :xref "cas" casid))) (format stream "Individual(<~a> type(a:casEntry) value(a:hasID \"~a\"^^xsd:string))~%" name casid) name)) (defun write-cas-fact++ (stream casid) (let ((name (id-uri :xref "cas" casid))) (format stream "(defindividual ~a)~%" name) name)) (defun write-kegg (stream keggid) (let ((name (id-uri :xref "kegg" keggid))) (format stream "Individual(<~a> type(a:keggEntry) value(a:hasID \"~a\"^^xsd:string))~%" name keggid) name)) (defun write-kegg-fact++ (stream keggid) (let ((name (id-uri :xref "kegg" keggid))) (format stream "(defindividual ~a)~%" name) name)) (defun write-smiles (stream smiles) (let ((name (id-uri :xref "smiles" smiles))) (format stream "Individual(<~a> type(a:SMILESDescription) value(a:hasDescription \"~a\"^^xsd:string))~%" name smiles) name)) (defun write-formcharge (stream formula charge) (let ((name (id-uri :xref "formula" formula charge))) (format stream "Individual(<~a> type(a:chemicalFormula) value(a:hasFormula \"~a\"^^xsd:string) value(a:hasCharge \"~a\"^^xsd:int))~%" name (string-downcase formula) charge) name)) (defparameter *alldifferent-counter* 0) (defvar *use-differentindividuals* nil) (defun write-alldifferent (stream list) (if *use-differentindividuals* (format stream "DifferentIndividuals(~{<~a>~^ ~})~%" list)) (let ((prop (format nil "internal:alldifferent~a" (incf *alldifferent-counter*)))) (format stream "DatatypeProperty(~a Functional range(xsd:int))~%" prop ) (loop for ind in list for key = (incf *alldifferent-counter*) do (format stream "Individual(<~a> value(~a \"~a\"^^xsd:int))~%" ind prop key )))) (defun write-alldisjoint-fact++ (stream list) (let ((*print-case* :downcase)) (princ `(disjoint ,@list) stream))) (defun write-alldifferent-fact++ (stream list) (let ((*print-case* :downcase)) (princ `(different ,@list) stream))) (defun write-smallmolecule (stream id names kegg cas smiles &rest formulae) (format stream "Individual(<~a> type(a:smallMolecule)~%" id) (when (and kegg (not (consp kegg))) (format stream "value(a:definedByKEGG <~a>)~%" kegg)) (when (and kegg (consp kegg)) (format stream "value(a:classifiedByKEGG <~a>)~%" (car kegg))) (when cas (format stream "value(a:definedByCAS <~a>)~%" cas)) (when smiles (format stream "value(a:hasSMILES <~a>)~%" smiles)) (dolist (formula formulae) (when formula (format stream "value(a:hasChemicalFormula <~a>)~%" formula))) (dolist (name names) (when name (format stream "value(a:hasName \"~a\"^^xsd:string)~%" (encode-name name)))) (format stream ")~%")) ;; (defun write-smallmolecule-fact++lisp (stream id names kegg cas smiles &rest formulae) (let ((*print-case* :downcase)) (when (and kegg (not (consp kegg))) (princ `(equal_c ,id (some defined-by-kegg (one-of ,kegg))) stream) (terpri stream) (when (and kegg (consp kegg)) (princ `(defconcept ,id (some classifedByKEGG (one-of ,kegg))))) (terpri stream) (when cas (princ `(equal_c ,id (some defined-by-cas (one-of ,cas))) stream)) (terpri stream) ))) (defparameter *mergeont* " Class(a:databaseEntry partial) DatatypeProperty(a:hasID Functional domain(a:databaseEntry) range(xsd:string) ) Class(a:casEntry partial) Class(a:keggEntry partial) SubClassOf(a:casEntry a:databaseEntry) SubClassOf(a:keggEntry a:databaseEntry) Class(a:chemicalStructureDescription partial) Class(a:SMILESDescription partial) SubClassOf(a:chemicalStructureDescription a:SMILESDescription) DatatypeProperty(a:hasDescription Functional domain(a:SMILESDescription) range(xsd:string) ) Class(a:chemicalFormula partial) SubClassOf(a:chemicalStructureDescription a:chemicalFormula) DatatypeProperty(a:hasFormula Functional domain(a:chemicalFormula) range(xsd:string) ) DatatypeProperty(a:hasCharge Functional domain(a:chemicalFormula) range(xsd:int) ) Class(a:smallMolecule partial) ObjectProperty(a:hasChemicalFormula domain(a:smallMolecule) range(a:chemicalFormula) ) ObjectProperty(a:hasSMILES InverseFunctional domain(a:smallMolecule) range(a:SMILESDescription)) ObjectProperty(a:definedByCAS InverseFunctional domain(a:smallMolecule) range(a:casEntry)) ObjectProperty(a:definedByCAS Functional) ObjectProperty(a:classifiedByKEGG Functional domain(a:smallMolecule) range(a:keggEntry)) ObjectProperty(a:definedByKEGG InverseFunctional domain(a:smallMolecule) range(a:keggEntry)) ObjectProperty(a:definedByKEGG super(a:classifiedByKEGG) Functional) DatatypeProperty(a:hasName domain(a:smallMolecule) range(xsd:string) ) ") ; Note ; adding Class(a:smallMoleculeNoFormula complete intersection(a:smallMolecule) restriction(a:hasChemicalFormula maxCardinality(0))) ; probably doubles memory footprint, quadruples check time. ; Doesn't return anything. ; alldifferent times 63 seconds 2 x. Without alldifferent 20, 30 seconds. (defun read-jeremy-compound-map (&optional (path "/Users/ruttenbe/Desktop/Jeremy - E.Coli Integration/palsson-biocyc-compounds.txt")) (with-open-file (f path) (remove-duplicates (loop for line = (read-line f nil :eof) until (eq line :eof) for fields = (map 'list #"toString" (#"split" line "\\t")) for pallson = (nth 1 fields) for ecocyc = (nth 10 fields) when (and pallson ecocyc) collect (list (format nil "pallson:~a" pallson) (format nil "ecocyc:~a" ecocyc))) :test 'equal))) (defun query-test (&optional (use-reasoner t)) (sparql (concatenate 'string "PREFIX rdf: " "SELECT ?mol " "WHERE { ?mol rdf:type }") :use-reasoner use-reasoner :flatten t)) ;; we've got to the first stage, based on CAS and Kegg. 263 match. Now what? ;; ? Are there any generics that match? ;; generic with a chemical formula 179 '(pprint(sparql (concatenate 'string "PREFIX rdf: " "PREFIX bug: " "SELECT ?mol ?formula ?kegg2 " "WHERE { ?mol bug:hasChemicalFormula ?formula. " " ?mol bug:classifiedByKEGG ?kegg2." " OPTIONAL { ?mol bug:definedByKEGG ?kegg }." " FILTER (!(bound(?kegg)))} LIMIT 10") :use-reasoner t )) ;; pure generics: none. '(length(sparql (concatenate 'string "PREFIX rdf: " "PREFIX bug: " "SELECT ?mol ?kegg2 " "WHERE { OPTIONAL {?mol bug:hasChemicalFormula ?formula}. " " ?mol bug:classifiedByKEGG ?kegg2." " OPTIONAL { ?mol bug:definedByKEGG ?kegg }." " FILTER (!(bound(?kegg)) && !(bound(?formula)))} ") :use-reasoner t )) (defparameter *ecocyc-to-passlon-manual* (make-hash-table :test 'equalp)) ;7098 leutrna Yes L-Leucyl-tRNA(Leu) C6H12NOR 1 C02047 KEGG -C02047 SMP adjusted for Genomatica's change to aa - NCD (defun read-manual-mapping (file) (with-open-file (f (merge-pathnames file (truename *working-directory*)) :direction :input) (read-line f) (loop for line = (read-line f nil :eof) until (eq line :eof) for (abbreviation officialName formula charge casNumber KEGG-cmpd-ID notes ecocyc-id analysis metacyc-id) = (map 'list #"toString" (#"split" line "\\t")) do (setf (gethash ecocyc-id *ecocyc-to-passlon-manual*) abbreviation)))) ;; (defun write-manual-mapping () ;; (let ((names (sparql '(:select (?mol) (:distinct t) (?mol "rdf:type" "bug:smallMolecule"))))) ;; (let ((namehash (make-hash-table :test 'equal))) ;; (loop for names in names do ;; (setf (gethash (pathname-name name) namehash) name))) ;; (maphash ;; (lambda(ecocyc pallson) ;; (id-uri ;; (print k))) *ecocyc-to-passlon-manual*) (defun test-query () (= (length (sparql '(:select (?mol ?formula ?kegg2) () (:optional (?mol "bug:hasChemicalFormula" ?formula)) (?mol "bug:classifiedByKEGG" ?kegg2) (:optional (?mol "bug:definedByKEGG" ?kegg)) (:filter (not (bound ?kegg)))))) (length (sparql (concatenate 'string "PREFIX rdf: " "PREFIX bug: " "SELECT ?mol ?formula ?kegg2 " "WHERE { OPTIONAL {?mol bug:hasChemicalFormula ?formula}. " " ?mol bug:classifiedByKEGG ?kegg2." " OPTIONAL { ?mol bug:definedByKEGG ?kegg }." " FILTER (!(bound(?kegg)))} ")))))