Source code for tooluniverse.unichem_tool
# unichem_tool.py
"""
UniChem REST API tool for ToolUniverse.
UniChem is EBI's unified chemical structure cross-referencing service.
It maps compound identifiers across 40+ chemical databases including
ChEMBL, DrugBank, PDBe, PubChem, KEGG, ChEBI, and HMDB. Given a
chemical structure (InChIKey) or database ID, UniChem returns all
known cross-references instantly.
API: https://www.ebi.ac.uk/unichem/api/v1/
No authentication required. Free for all use.
"""
import requests
from typing import Dict, Any
from .base_tool import BaseTool
from .tool_registry import register_tool
UNICHEM_BASE_URL = "https://www.ebi.ac.uk/unichem/api/v1"
[docs]
@register_tool("UniChemTool")
class UniChemTool(BaseTool):
"""
Tool for querying UniChem compound cross-referencing service.
Maps chemical identifiers across 40+ databases using InChIKey,
source compound IDs, or UCIs (UniChem Compound Identifiers).
No authentication required.
"""
[docs]
def __init__(self, tool_config: Dict[str, Any]):
super().__init__(tool_config)
self.timeout = tool_config.get("timeout", 30)
self.endpoint_type = tool_config.get("fields", {}).get(
"endpoint_type", "search_compound"
)
[docs]
def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Execute the UniChem API call."""
try:
return self._dispatch(arguments)
except requests.exceptions.Timeout:
return {
"error": f"UniChem API request timed out after {self.timeout} seconds"
}
except requests.exceptions.ConnectionError:
return {
"error": "Failed to connect to UniChem API. Check network connectivity."
}
except requests.exceptions.HTTPError as e:
return {"error": f"UniChem API HTTP error: {e.response.status_code}"}
except Exception as e:
return {"error": f"Unexpected error querying UniChem: {str(e)}"}
[docs]
def _dispatch(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Route to appropriate endpoint based on config."""
if self.endpoint_type == "search_compound":
return self._search_compound(arguments)
elif self.endpoint_type == "list_sources":
return self._list_sources(arguments)
else:
return {"error": f"Unknown endpoint_type: {self.endpoint_type}"}
[docs]
def _search_compound(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Search UniChem for a compound by InChIKey, sourceID, or UCI."""
compound = arguments.get("compound", "")
search_type = arguments.get("type", "inchikey")
source_id = arguments.get("sourceID", None)
if not compound:
return {
"error": "compound parameter is required (e.g., InChIKey 'BSYNRYMUTXBXSQ-UHFFFAOYSA-N')"
}
payload = {
"compound": compound,
"type": search_type,
}
if source_id is not None:
payload["sourceID"] = source_id
url = f"{UNICHEM_BASE_URL}/compounds"
response = requests.post(
url,
json=payload,
headers={"Content-Type": "application/json"},
timeout=self.timeout,
)
response.raise_for_status()
raw = response.json()
# Extract compound info
compounds = raw.get("compounds", [])
if not compounds:
return {
"data": {
"inchi": None,
"inchikey": None,
"formula": None,
"source_count": 0,
"sources": [],
},
"metadata": {
"source": "UniChem",
"query": compound,
"endpoint": "compounds",
},
}
first = compounds[0]
inchi_data = first.get("inchi", {})
inchi_str = (
inchi_data.get("inchi", None) if isinstance(inchi_data, dict) else None
)
formula = (
inchi_data.get("formula", None) if isinstance(inchi_data, dict) else None
)
sources_raw = first.get("sources", [])
sources = []
for s in sources_raw:
sources.append(
{
"source_name": s.get("shortName", ""),
"source_long_name": s.get("longName", ""),
"compound_id": s.get("compoundId", ""),
"url": s.get("url", None),
}
)
# Derive InChIKey from InChI if not directly available
if inchi_str:
# Try to find it from sources or connectivity info
for s in sources_raw:
pass # InChIKey might not be directly in compound response
result = {
"inchi": inchi_str,
"inchikey": compound if search_type == "inchikey" else None,
"formula": formula,
"source_count": len(sources),
"sources": sources,
}
return {
"data": result,
"metadata": {
"source": "UniChem",
"query": compound,
"endpoint": "compounds",
},
}
[docs]
def _list_sources(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""List all chemical database sources in UniChem."""
url = f"{UNICHEM_BASE_URL}/sources/"
response = requests.get(
url,
headers={"Content-Type": "application/json"},
timeout=self.timeout,
)
response.raise_for_status()
raw = response.json()
sources_raw = raw.get("sources", [])
sources = []
for s in sources_raw:
sources.append(
{
"source_id": s.get("sourceID", 0),
"name": s.get("name", ""),
"long_name": s.get("nameLong", s.get("nameLabel", "")),
"description": s.get("description", None),
"compound_count": s.get("UCICount", None),
"last_updated": s.get("lastUpdated", None),
}
)
result = {
"source_count": len(sources),
"sources": sources,
}
return {
"data": result,
"metadata": {
"source": "UniChem",
"query": "all_sources",
"endpoint": "sources",
},
}