Source code for tooluniverse.identifiers_org_tool

"""
Identifiers.org API tool for ToolUniverse.

Identifiers.org is an ELIXIR service providing persistent, resolvable identifiers
for life science data. It supports 800+ registered namespaces (databases).

API: https://resolver.api.identifiers.org and https://registry.api.identifiers.org
No authentication required. Public access.
"""

import requests
from typing import Any

from .base_rest_tool import BaseRESTTool
from .tool_registry import register_tool

RESOLVER_BASE = "https://resolver.api.identifiers.org"
REGISTRY_BASE = "https://registry.api.identifiers.org"


[docs] @register_tool("IdentifiersOrgTool") class IdentifiersOrgTool(BaseRESTTool): """ Tool for Identifiers.org - biological identifier resolution service. Resolves compact identifiers (e.g., 'uniprot:P04637') to resource URLs and searches the namespace registry. No authentication required. """
[docs] def __init__(self, tool_config: dict): super().__init__(tool_config) self.timeout = 20 self.operation = tool_config.get("fields", {}).get("operation", "resolve")
[docs] def run(self, arguments: dict) -> dict: """Execute the Identifiers.org API call.""" try: return self._query(arguments) except requests.exceptions.Timeout: return {"error": f"Identifiers.org request timed out after {self.timeout}s"} except requests.exceptions.ConnectionError: return {"error": "Failed to connect to Identifiers.org."} except requests.exceptions.HTTPError as e: return {"error": f"Identifiers.org HTTP error: {e.response.status_code}"} except Exception as e: return {"error": f"Unexpected error: {str(e)}"}
[docs] def _query(self, arguments: dict) -> dict: op = self.operation if op == "resolve": return self._resolve(arguments) elif op == "get_namespace": return self._get_namespace(arguments) elif op == "search_namespaces": return self._search_namespaces(arguments) elif op == "list_namespaces": return self._list_namespaces(arguments) else: return {"error": f"Unknown operation: {op}"}
[docs] def _resolve(self, arguments: dict) -> dict: """Resolve a compact identifier to resource URLs.""" compact_id = arguments.get("compact_id", "").strip() if not compact_id: return { "error": "compact_id parameter is required (e.g., 'uniprot:P04637')" } # Use the compact_id directly in the URL path (no URL encoding of colon) url = f"{RESOLVER_BASE}/{compact_id}" resp = requests.get(url, timeout=self.timeout) resp.raise_for_status() data = resp.json() payload = data.get("payload", {}) parsed = payload.get("parsedCompactIdentifier", {}) resources = payload.get("resolvedResources", []) resolved = [] for r in resources: resolved.append( { "provider_code": r.get("providerCode"), "resolved_url": r.get("compactIdentifierResolvedUrl"), "description": r.get("description"), "official": r.get("official"), "home_url": r.get("resourceHomeUrl"), "institution": r.get("institution", {}).get("name"), } ) return { "data": { "compact_id": compact_id, "namespace": parsed.get("namespace"), "local_id": parsed.get("localId"), "resolved_resources": resolved, "resource_count": len(resolved), }, "metadata": { "source": "Identifiers.org", "error_message": data.get("errorMessage"), }, }
[docs] def _get_namespace(self, arguments: dict) -> dict: """Get namespace details by prefix.""" prefix = arguments.get("prefix", "").strip() if not prefix: return {"error": "prefix parameter is required (e.g., 'uniprot', 'pdb')"} url = f"{REGISTRY_BASE}/restApi/namespaces/search/findByPrefix" resp = requests.get(url, params={"prefix": prefix}, timeout=self.timeout) resp.raise_for_status() data = resp.json() return { "data": data, "metadata": { "prefix": prefix, "source": "Identifiers.org Registry", }, }
[docs] def _search_namespaces(self, arguments: dict) -> dict: """Search namespaces by keyword.""" content = arguments.get("content", "").strip() if not content: return {"error": "content parameter is required"} params: dict[str, Any] = { "content": content, "page": arguments.get("page", 0), "size": min(int(arguments.get("size", 10)), 50), } url = f"{REGISTRY_BASE}/restApi/namespaces/search/findByPrefixContaining" resp = requests.get(url, params=params, timeout=self.timeout) resp.raise_for_status() data = resp.json() embedded = data.get("_embedded", {}) namespaces = embedded.get("namespaces", []) page_info = data.get("page", {}) ns_list = [] for ns in namespaces: ns_list.append( { "prefix": ns.get("prefix"), "name": ns.get("name"), "pattern": ns.get("pattern"), "description": ns.get("description"), "sample_id": ns.get("sampleId"), "deprecated": ns.get("deprecated"), } ) return { "data": { "namespaces": ns_list, "total_elements": page_info.get("totalElements"), "total_pages": page_info.get("totalPages"), }, "metadata": { "search_term": content, "source": "Identifiers.org Registry", }, }
[docs] def _list_namespaces(self, arguments: dict) -> dict: """List all registered namespaces with pagination.""" params: dict[str, Any] = { "page": arguments.get("page", 0), "size": min(int(arguments.get("size", 20)), 100), } url = f"{REGISTRY_BASE}/restApi/namespaces" resp = requests.get(url, params=params, timeout=self.timeout) resp.raise_for_status() data = resp.json() embedded = data.get("_embedded", {}) namespaces = embedded.get("namespaces", []) page_info = data.get("page", {}) ns_list = [ { "prefix": ns.get("prefix"), "name": ns.get("name"), "pattern": ns.get("pattern"), "sample_id": ns.get("sampleId"), } for ns in namespaces ] return { "data": { "namespaces": ns_list, "total_elements": page_info.get("totalElements"), "total_pages": page_info.get("totalPages"), "current_page": page_info.get("number"), }, "metadata": { "source": "Identifiers.org Registry", }, }