Source code for tooluniverse.classyfire_tool
"""
ClassyFire tool for ToolUniverse — automated chemical taxonomy by InChIKey.
ClassyFire (Wishart Lab) assigns a structure to a hierarchical chemical ontology
(ChemOnt): kingdom -> superclass -> class -> subclass -> direct parent, plus the
molecular framework, substituents, and a textual description. This wraps the
precomputed InChIKey lookup, which is an instant cache hit for known structures.
API: http://classyfire.wishartlab.com/entities/{inchikey}.json (public, no auth)
"""
from typing import Any, Dict, Optional
import requests
from .base_tool import BaseTool
from .tool_registry import register_tool
CLASSYFIRE_BASE = "http://classyfire.wishartlab.com/entities"
def _name(node: Any) -> Optional[str]:
"""ChemOnt nodes are {'name', 'chemont_id', ...}; pull the name."""
return node.get("name") if isinstance(node, dict) else node
[docs]
@register_tool("ClassyFireTool")
class ClassyFireTool(BaseTool):
"""Classify a chemical structure into the ChemOnt taxonomy by InChIKey."""
[docs]
def __init__(self, tool_config: Dict[str, Any]):
super().__init__(tool_config)
self.timeout = tool_config.get("fields", {}).get("timeout", 30)
[docs]
def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
inchikey = (arguments.get("inchikey") or "").strip()
if not inchikey:
return {
"status": "error",
"error": "'inchikey' is required (a full 27-char InChIKey, e.g. 'BSYNRYMUTXBXSQ-UHFFFAOYSA-N')",
}
url = f"{CLASSYFIRE_BASE}/{inchikey}.json"
try:
resp = requests.get(
url, headers={"Accept": "application/json"}, timeout=self.timeout
)
if resp.status_code == 404:
return {
"status": "success",
"data": {"inchikey": inchikey, "classified": False},
"metadata": {
"classified": False,
"note": f"InChIKey '{inchikey}' is not in the ClassyFire cache. "
"Only previously-classified structures are available via this lookup.",
"source": "ClassyFire",
},
}
resp.raise_for_status()
rec = resp.json()
except requests.exceptions.Timeout:
return {
"status": "error",
"error": f"ClassyFire request timed out after {self.timeout}s",
}
except requests.exceptions.RequestException as e:
return {"status": "error", "error": f"ClassyFire request failed: {e}"}
except ValueError:
return {
"status": "error",
"error": "ClassyFire returned a non-JSON response",
}
# A cache miss can also come back as 200 with an empty/no-taxonomy body.
if not isinstance(rec, dict) or not rec.get("kingdom"):
return {
"status": "success",
"data": {"inchikey": inchikey, "classified": False},
"metadata": {"classified": False, "source": "ClassyFire"},
}
return {
"status": "success",
"data": {
"inchikey": rec.get("inchikey") or inchikey,
"classified": True,
"kingdom": _name(rec.get("kingdom")),
"superclass": _name(rec.get("superclass")),
"class": _name(rec.get("class")),
"subclass": _name(rec.get("subclass")),
"direct_parent": _name(rec.get("direct_parent")),
"intermediate_nodes": [
_name(n) for n in rec.get("intermediate_nodes", [])
],
"molecular_framework": rec.get("molecular_framework"),
"substituents": rec.get("substituents", []),
"description": rec.get("description"),
},
"metadata": {"classified": True, "source": "ClassyFire (ChemOnt)"},
}