Source code for tooluniverse.zooma_tool

"""
ZOOMA tool for ToolUniverse.

ZOOMA (https://www.ebi.ac.uk/spot/zooma) is an EBI/SPOT service that maps free
text (e.g. a sample attribute, phenotype description, organism name, or disease
label) to ontology terms. It returns ontology cross-references (semantic tags as
OBO/EFO IRIs) together with a confidence rating (HIGH/GOOD/MEDIUM/LOW) and
provenance, drawing on curated annotations plus OLS text tagging.

This fills the gap left by the retired OxO service and complements OLS: instead
of looking up a known term, ZOOMA annotates arbitrary free text to the most
likely ontology term(s).

Operations (dispatched by tool_config["fields"]["operation"]):
  - annotate         : map free text to ranked ontology annotations (default)
  - list_datasources : list ZOOMA curated annotation datasources

API: https://www.ebi.ac.uk/spot/zooma/v2/api/
No authentication required. Public access.
"""

import requests
from typing import Any

from .base_tool import BaseTool
from .tool_registry import register_tool

ZOOMA_BASE = "https://www.ebi.ac.uk/spot/zooma/v2/api"

# Confidence ranking used for the optional min_confidence filter.
_CONFIDENCE_RANK = {"LOW": 0, "MEDIUM": 1, "GOOD": 2, "HIGH": 3}


[docs] @register_tool("ZoomaTool") class ZoomaTool(BaseTool): """ Tool for ZOOMA - EBI free-text-to-ontology annotation service. Maps a free-text property value (optionally with a property type and an ontology source filter) to ontology terms, returning their IRIs, CURIEs, confidence, and provenance. Also lists ZOOMA datasources. No authentication required. run() never raises. """
[docs] def __init__(self, tool_config: dict): super().__init__(tool_config) self.timeout = 30 fields = tool_config.get("fields", {}) or {} self.operation = fields.get("operation", "annotate")
[docs] def run(self, arguments: dict) -> dict: """Execute the requested ZOOMA call. Never raises.""" try: if self.operation == "list_datasources": return self._list_datasources() return self._annotate(arguments or {}) except requests.exceptions.Timeout: return { "status": "error", "error": f"ZOOMA request timed out after {self.timeout}s", } except requests.exceptions.ConnectionError: return {"status": "error", "error": "Failed to connect to ZOOMA service."} except requests.exceptions.HTTPError as e: code = getattr(e.response, "status_code", "unknown") return {"status": "error", "error": f"ZOOMA HTTP error: {code}"} except Exception as e: # noqa: BLE001 - defensive: run() must never raise return {"status": "error", "error": f"Unexpected error: {str(e)}"}
# ----------------------------------------------------------- annotate
[docs] def _annotate(self, arguments: dict) -> dict: property_value = (arguments.get("property_value") or "").strip() if not property_value: return { "status": "error", "error": "Parameter 'property_value' is required (free text to annotate).", } params = {"propertyValue": property_value} property_type = arguments.get("property_type") if property_type: params["propertyType"] = str(property_type).strip() # Optional ontology / source filter, e.g. "efo" or "efo,uberon". ontologies = arguments.get("ontologies") if ontologies: if isinstance(ontologies, (list, tuple)): onto_list = [str(o).strip() for o in ontologies if str(o).strip()] else: onto_list = [o.strip() for o in str(ontologies).split(",") if o.strip()] if onto_list: params["filter"] = ( "required:[none],ontologies:[" + ",".join(onto_list) + "]" ) resp = requests.get( f"{ZOOMA_BASE}/services/annotate", params=params, headers={"Accept": "application/json"}, timeout=self.timeout, ) resp.raise_for_status() raw = resp.json() if not isinstance(raw, list): raw = [] min_conf = arguments.get("min_confidence") min_rank = _CONFIDENCE_RANK.get(str(min_conf).upper()) if min_conf else None try: max_results = int(arguments.get("max_results", 10)) except (TypeError, ValueError): max_results = 10 if max_results < 1: max_results = 1 annotations = [] for item in raw: conf = item.get("confidence") if min_rank is not None: rank = _CONFIDENCE_RANK.get(str(conf).upper()) if rank is None or rank < min_rank: continue annotations.append(self._format(item)) if len(annotations) >= max_results: break return {"status": "success", "data": annotations}
[docs] @staticmethod def _format(item: dict) -> dict: prop = item.get("annotatedProperty") or {} prov = item.get("provenance") or {} source = prov.get("source") or {} olslinks = (item.get("_links") or {}).get("olslinks") or [] ols_urls = [ link.get("href") for link in olslinks if isinstance(link, dict) and link.get("href") ] tags = item.get("semanticTags") or [] curies = [c for c in (_iri_to_curie(t) for t in tags) if c] return { "property_value": prop.get("propertyValue"), "property_type": prop.get("propertyType"), "semantic_tags": tags, "curies": curies, "confidence": item.get("confidence"), "source": source.get("name"), "source_type": source.get("type"), "evidence": prov.get("evidence"), "ols_links": ols_urls, }
# ------------------------------------------------------- datasources
[docs] def _list_datasources(self) -> dict: resp = requests.get( f"{ZOOMA_BASE}/sources", headers={"Accept": "application/json"}, timeout=self.timeout, ) resp.raise_for_status() raw = resp.json() if not isinstance(raw, list): raw = [] data = [ { "name": s.get("name"), "type": s.get("type"), "uri": s.get("uri"), } for s in raw if isinstance(s, dict) ] return {"status": "success", "data": data}
def _iri_to_curie(iri: Any) -> str | None: """Derive a CURIE (e.g. MONDO:0004979) from an ontology IRI/PURL.""" if not iri or not isinstance(iri, str): return None short = iri.rstrip("/").split("/")[-1].split("#")[-1] if "_" in short: prefix, _, local = short.partition("_") if prefix and local: return f"{prefix}:{local}" return short or None