Source code for tooluniverse.interpro_ext_tool

# interpro_ext_tool.py
"""
InterPro Extended API tool for ToolUniverse.

Provides access to InterPro API endpoints for querying proteins by domain,
complementing existing domain-centric search tools.

API: https://www.ebi.ac.uk/interpro/api/
No authentication required. Free public access.
"""

import requests
from typing import Dict, Any, Optional
from .base_tool import BaseTool


INTERPRO_API_BASE_URL = "https://www.ebi.ac.uk/interpro/api"


[docs] class InterProExtTool(BaseTool): """ Extended InterPro API tool for protein-by-domain queries. Complements existing InterPro tools (get_protein_domains, search_domains, get_domain_details) by providing reverse lookup: find all proteins containing a specific domain. No authentication required. """
[docs] def __init__(self, tool_config: Dict[str, Any]): super().__init__(tool_config) self.timeout = tool_config.get("timeout", 60) fields = tool_config.get("fields", {}) self.endpoint = fields.get("endpoint", "proteins_by_domain")
[docs] def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Execute the InterPro API call.""" try: return self._query(arguments) except requests.exceptions.Timeout: return {"error": f"InterPro API timed out after {self.timeout}s"} except requests.exceptions.ConnectionError: return {"error": "Failed to connect to InterPro API"} except requests.exceptions.HTTPError as e: code = e.response.status_code if e.response is not None else "unknown" if code == 404: return { "error": f"Domain not found in InterPro: {arguments.get('domain_id', '')}" } return {"error": f"InterPro API HTTP error: {code}"} except Exception as e: return {"error": f"Unexpected error querying InterPro API: {str(e)}"}
[docs] def _query(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Route to appropriate endpoint.""" if self.endpoint == "proteins_by_domain": return self._get_proteins_by_domain(arguments) else: return {"error": f"Unknown endpoint: {self.endpoint}"}
[docs] def _get_proteins_by_domain(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Get proteins containing a specific InterPro domain.""" domain_id = arguments.get("domain_id", "") if not domain_id: return { "error": "domain_id parameter is required (InterPro accession, e.g., IPR011615)" } page_size = min(int(arguments.get("page_size", 20)), 50) reviewed_only = arguments.get("reviewed_only", False) # Build URL for protein search by domain db = "reviewed" if reviewed_only else "uniprot" url = f"{INTERPRO_API_BASE_URL}/protein/{db}/entry/interpro/{domain_id}" params = {"page_size": page_size} response = requests.get(url, params=params, timeout=self.timeout) response.raise_for_status() data = response.json() total_count = data.get("count", 0) results = data.get("results", []) proteins = [] for r in results: meta = r.get("metadata", {}) proteins.append( { "accession": meta.get("accession"), "name": meta.get("name"), "source_database": meta.get("source_database"), "length": meta.get("length"), "source_organism": meta.get("source_organism", {}).get( "scientificName" ), "tax_id": meta.get("source_organism", {}).get("taxId"), } ) return { "data": { "domain_id": domain_id, "total_proteins": total_count, "proteins": proteins, "page_size": page_size, "reviewed_only": reviewed_only, }, "metadata": { "source": "InterPro API - Proteins by Domain", "domain_id": domain_id, }, }