Source code for tooluniverse.interpro_domain_arch_tool

# interpro_domain_arch_tool.py
"""
InterPro Domain Architecture tool for ToolUniverse.

Provides access to InterPro API endpoints for domain architecture analysis:
- Protein domain architecture with exact residue positions
- PDB structures containing a specific Pfam domain
- Pfam clan (superfamily) member families

API: https://www.ebi.ac.uk/interpro/api/
No authentication required. Free public access.
"""

import requests
from typing import Dict, Any
from .base_tool import BaseTool


INTERPRO_BASE_URL = "https://www.ebi.ac.uk/interpro/api"


[docs] class InterProDomainArchTool(BaseTool): """ Tool for InterPro domain architecture analysis via the InterPro API. No authentication required. """
[docs] def __init__(self, tool_config: Dict[str, Any]): super().__init__(tool_config) self.timeout = tool_config.get("timeout", 60) fields = tool_config.get("fields", {}) self.endpoint = fields.get("endpoint", "protein_domain_architecture")
[docs] def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Execute the InterPro API call.""" try: return self._query(arguments) except requests.exceptions.Timeout: return {"error": f"InterPro API timed out after {self.timeout}s"} except requests.exceptions.ConnectionError: return {"error": "Failed to connect to InterPro API"} except requests.exceptions.HTTPError as e: code = e.response.status_code if e.response is not None else "unknown" if code == 404: param = arguments.get( "accession", arguments.get( "pfam_accession", arguments.get("clan_accession", "") ), ) return {"error": f"Not found in InterPro: {param}"} if code == 204: return {"error": "No results found"} return {"error": f"InterPro API HTTP error: {code}"} except Exception as e: return {"error": f"Unexpected error querying InterPro API: {str(e)}"}
[docs] def _query(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Route to appropriate endpoint.""" if self.endpoint == "protein_domain_architecture": return self._get_protein_domain_architecture(arguments) elif self.endpoint == "structures_for_domain": return self._get_structures_for_domain(arguments) elif self.endpoint == "clan_members": return self._get_clan_members(arguments) else: return {"error": f"Unknown endpoint: {self.endpoint}"}
[docs] def _get_protein_domain_architecture( self, arguments: Dict[str, Any] ) -> Dict[str, Any]: """Get Pfam domain architecture for a protein with exact positions.""" accession = arguments.get("accession", "") if not accession: return { "error": "accession parameter is required (UniProt accession, e.g., 'P04637')" } url = f"{INTERPRO_BASE_URL}/entry/pfam/protein/uniprot/{accession}" params = {"page_size": 50, "format": "json"} response = requests.get(url, params=params, timeout=self.timeout) response.raise_for_status() data = response.json() results = data.get("results", []) domains = [] protein_length = None for r in results: metadata = r.get("metadata", {}) name_info = metadata.get("name", {}) name = ( name_info.get("name", "") if isinstance(name_info, dict) else str(name_info) ) proteins = r.get("proteins", []) for p in proteins: if protein_length is None: protein_length = p.get("protein_length") for loc in p.get("entry_protein_locations", []): for frag in loc.get("fragments", []): domains.append( { "pfam_accession": metadata.get("accession", ""), "name": name, "type": metadata.get("type", ""), "integrated_interpro": metadata.get("integrated"), "start": frag.get("start"), "end": frag.get("end"), "score": loc.get("score"), } ) # Sort domains by start position domains.sort(key=lambda d: d.get("start", 0)) return { "data": { "accession": accession, "protein_length": protein_length, "domain_count": len(domains), "domains": domains, }, "metadata": { "source": "InterPro API (Pfam domain architecture)", "accession": accession, }, }
[docs] def _get_structures_for_domain(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Find PDB structures containing a specific Pfam domain.""" pfam_acc = arguments.get("pfam_accession", "") if not pfam_acc: return {"error": "pfam_accession parameter is required (e.g., 'PF00870')"} max_results = min(arguments.get("max_results", 20), 200) url = f"{INTERPRO_BASE_URL}/structure/pdb/entry/pfam/{pfam_acc}" params = {"page_size": max_results, "format": "json"} response = requests.get(url, params=params, timeout=self.timeout) response.raise_for_status() data = response.json() total = data.get("count", 0) results = data.get("results", []) structures = [] for r in results: m = r.get("metadata", {}) structures.append( { "pdb_id": m.get("accession", ""), "name": m.get("name"), "experiment_type": m.get("experiment_type"), "resolution": m.get("resolution"), } ) return { "data": { "pfam_accession": pfam_acc, "total_structures": total, "returned": len(structures), "structures": structures, }, "metadata": { "source": "InterPro API (structures for domain)", "pfam_accession": pfam_acc, }, }
[docs] def _get_clan_members(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Get member families in a Pfam clan (superfamily).""" clan_acc = arguments.get("clan_accession", "") if not clan_acc: return {"error": "clan_accession parameter is required (e.g., 'CL0016')"} max_results = min(arguments.get("max_results", 50), 200) # First get clan metadata url = f"{INTERPRO_BASE_URL}/set/pfam/{clan_acc}" params = {"format": "json"} response = requests.get(url, params=params, timeout=self.timeout) response.raise_for_status() clan_data = response.json() metadata = clan_data.get("metadata", {}) name_info = metadata.get("name", {}) clan_name = ( name_info.get("name", "") if isinstance(name_info, dict) else str(name_info) ) # Get member families from relationships relationships = metadata.get("relationships", {}) nodes = relationships.get("nodes", []) members = [] for node in nodes[:max_results]: members.append( { "accession": node.get("accession", ""), "short_name": node.get("short_name"), "name": node.get("name"), "type": node.get("type"), "score": node.get("score"), } ) # Sort by accession members.sort(key=lambda m: m.get("accession", "")) return { "data": { "clan_accession": clan_acc, "clan_name": clan_name, "description": metadata.get("description"), "member_count": len(members), "members": members, }, "metadata": { "source": "InterPro API (Pfam clan members)", "clan_accession": clan_acc, }, }