Source code for tooluniverse.bioportal_tool

# bioportal_tool.py
"""
BioPortal (NCBO) API tool for ToolUniverse.

BioPortal is the world's most comprehensive repository of biomedical
ontologies, hosting 900+ ontologies including GO, HPO, DOID, SNOMED,
MeSH, CHEBI, and many more. It provides cross-ontology search, concept
details, hierarchical browsing, text annotation, and cross-ontology
mappings.

API: https://data.bioontology.org/
Uses public demo API key (free, no registration needed).
"""

import requests
from typing import Dict, Any
from .base_tool import BaseTool
from .tool_registry import register_tool

BIOPORTAL_BASE_URL = "https://data.bioontology.org"
# Public demo API key available at http://bioportal.bioontology.org/
BIOPORTAL_API_KEY = "8b5b7825-538d-40e0-9e9e-5ab9274a9aeb"


[docs] @register_tool("BioPortalTool") class BioPortalTool(BaseTool): """ Tool for querying BioPortal, the largest biomedical ontology repository. BioPortal hosts 900+ ontologies covering diseases (DOID, MONDO), phenotypes (HPO), gene function (GO), chemicals (CHEBI), anatomy (UBERON), drugs (RXNORM), and more. Supports: cross-ontology search, concept detail lookup, text annotation with ontology terms, and concept hierarchy traversal. Uses public demo API key (no registration required). """
[docs] def __init__(self, tool_config: Dict[str, Any]): super().__init__(tool_config) self.timeout = tool_config.get("timeout", 30) fields = tool_config.get("fields", {}) self.endpoint = fields.get("endpoint", "search")
[docs] def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Execute the BioPortal API call.""" try: return self._query(arguments) except requests.exceptions.Timeout: return {"error": f"BioPortal API timed out after {self.timeout}s"} except requests.exceptions.ConnectionError: return { "error": "Failed to connect to BioPortal API (data.bioontology.org). The server may be blocking connections from your network or IP address." } except requests.exceptions.HTTPError as e: return {"error": f"BioPortal API HTTP error: {e.response.status_code}"} except Exception as e: return {"error": f"Unexpected error querying BioPortal: {str(e)}"}
[docs] def _query(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Route to appropriate BioPortal endpoint.""" if self.endpoint == "search": return self._search(arguments) elif self.endpoint == "get_concept": return self._get_concept(arguments) elif self.endpoint == "annotate_text": return self._annotate_text(arguments) elif self.endpoint == "get_hierarchy": return self._get_hierarchy(arguments) else: return {"error": f"Unknown endpoint: {self.endpoint}"}
[docs] def _get_concept(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Get detailed information for a specific ontology concept.""" ontology = arguments.get("ontology", "") concept_id = arguments.get("concept_id", "") if not ontology or not concept_id: return {"error": "Both ontology and concept_id are required"} # URL-encode the concept IRI (single encode only) import urllib.parse encoded_id = urllib.parse.quote(concept_id, safe="") url = f"{BIOPORTAL_BASE_URL}/ontologies/{ontology}/classes/{encoded_id}" params = { "apikey": BIOPORTAL_API_KEY, "display_links": "false", "display_context": "false", } response = requests.get(url, params=params, timeout=self.timeout) response.raise_for_status() data = response.json() return { "data": { "label": data.get("prefLabel"), "id": data.get("@id"), "synonyms": data.get("synonym", []), "definitions": data.get("definition", []), "obsolete": data.get("obsolete", False), "cui": data.get("cui", []), "semantic_type": data.get("semanticType", []), }, "metadata": { "source": "BioPortal (NCBO)", "ontology": ontology, }, }
[docs] def _annotate_text(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Annotate biomedical text with ontology terms (named entity recognition).""" text = arguments.get("text", "") if not text: return {"error": "text parameter is required"} ontologies = arguments.get("ontologies") longest_only = arguments.get("longest_only") if longest_only is None: longest_only = True url = f"{BIOPORTAL_BASE_URL}/annotator" payload = { "apikey": BIOPORTAL_API_KEY, "text": text, "longest_only": str(longest_only).lower(), "include": "prefLabel", "display_links": "false", "display_context": "false", } if ontologies: payload["ontologies"] = ontologies response = requests.post(url, data=payload, timeout=self.timeout) response.raise_for_status() data = response.json() annotations = [] for ann in data: cls = ann.get("annotatedClass", {}) for match in ann.get("annotations", []): annotations.append( { "matched_text": match.get("text"), "from": match.get("from"), "to": match.get("to"), "match_type": match.get("matchType"), "concept_label": cls.get("prefLabel"), "concept_id": cls.get("@id", "").split("/")[-1] if cls.get("@id") else None, "concept_full_id": cls.get("@id"), } ) return { "data": annotations, "metadata": { "source": "BioPortal Annotator (NCBO)", "total_annotations": len(annotations), "text_length": len(text), }, }
[docs] def _get_hierarchy(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Get children or ancestors of an ontology concept.""" ontology = arguments.get("ontology", "") concept_id = arguments.get("concept_id", "") direction = arguments.get("direction", "children") if not ontology or not concept_id: return {"error": "Both ontology and concept_id are required"} import urllib.parse encoded_id = urllib.parse.quote(concept_id, safe="") if direction == "ancestors": url = f"{BIOPORTAL_BASE_URL}/ontologies/{ontology}/classes/{encoded_id}/ancestors" elif direction == "parents": url = f"{BIOPORTAL_BASE_URL}/ontologies/{ontology}/classes/{encoded_id}/parents" else: url = f"{BIOPORTAL_BASE_URL}/ontologies/{ontology}/classes/{encoded_id}/children" page_size = arguments.get("page_size") or 25 params = { "apikey": BIOPORTAL_API_KEY, "display_links": "false", "display_context": "false", "pagesize": min(page_size, 100), } response = requests.get(url, params=params, timeout=self.timeout) response.raise_for_status() data = response.json() # Handle paginated vs flat list responses concepts = [] if isinstance(data, list): items = data total = len(data) else: items = data.get("collection", []) total = data.get("totalCount", len(items)) for item in items: concepts.append( { "label": item.get("prefLabel"), "id": item.get("@id", "").split("/")[-1] if item.get("@id") else None, "full_id": item.get("@id"), "synonyms": item.get("synonym", [])[:3], "definition": (item.get("definition") or [None])[0], "obsolete": item.get("obsolete", False), } ) return { "data": concepts, "metadata": { "source": "BioPortal (NCBO)", "ontology": ontology, "direction": direction, "total_count": total, "concept_id": concept_id, }, }