Source code for tooluniverse.disprot_tool

# disprot_tool.py
"""
DisProt tool for ToolUniverse.

DisProt is a manually curated database of intrinsically disordered proteins
and regions, providing experimentally validated disorder annotations with
evidence codes and literature references.

API: https://disprot.org/api/
No authentication required.
"""

import requests
from typing import Dict, Any
from .base_tool import BaseTool
from .tool_registry import register_tool

DISPROT_BASE_URL = "https://disprot.org/api"


[docs] @register_tool("DisProtTool") class DisProtTool(BaseTool): """ Tool for querying DisProt intrinsically disordered protein database. Supports: - Search disordered proteins by text query - Get detailed disorder region annotations for a specific protein No authentication required. """
[docs] def __init__(self, tool_config: Dict[str, Any]): super().__init__(tool_config) self.timeout = tool_config.get("timeout", 60) fields = tool_config.get("fields", {}) self.endpoint = fields.get("endpoint", "search")
[docs] def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Execute the DisProt API call.""" try: return self._query(arguments) except requests.exceptions.Timeout: return {"error": f"DisProt API timed out after {self.timeout}s."} except requests.exceptions.ConnectionError: return {"error": "Failed to connect to DisProt API (disprot.org)."} except requests.exceptions.HTTPError as e: status = e.response.status_code if e.response is not None else "unknown" if status == 404: return {"error": "Entry not found in DisProt. Check the accession."} return {"error": f"DisProt API HTTP {status}"} except Exception as e: return {"error": f"Unexpected error: {str(e)}"}
[docs] def _query(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Route to appropriate endpoint.""" if self.endpoint == "search": return self._search(arguments) elif self.endpoint == "get_entry": return self._get_entry(arguments) else: return {"error": f"Unknown endpoint: {self.endpoint}"}
[docs] def _get_entry(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Get detailed disorder regions for a DisProt entry.""" accession = arguments.get("accession", "") if not accession: return { "error": "accession is required. Use DisProt ID (e.g., 'DP00086') or UniProt accession (e.g., 'P04637')." } # Determine whether it's a DisProt ID (DP*) or UniProt accession url = f"{DISPROT_BASE_URL}/search" if accession.upper().startswith("DP"): params = {"disprot_id": accession.upper(), "page_size": 1} else: params = {"acc": accession, "page_size": 1} response = requests.get(url, params=params, timeout=self.timeout) response.raise_for_status() search_result = response.json() results = search_result.get("data", []) if not results: return {"error": f"Entry not found in DisProt for accession '{accession}'."} data = results[0] # Extract regions regions = [] for region in data.get("regions", []): cross_refs = [] for xr in region.get("cross_refs", []): cross_refs.append( { "db": xr.get("db"), "id": xr.get("id"), } ) regions.append( { "region_id": region.get("region_id"), "start": region.get("start"), "end": region.get("end"), "term_name": region.get("term_name"), "term_namespace": region.get("term_namespace"), "evidence_code": region.get("ec_name"), "method": region.get("method_name"), "cross_refs": cross_refs[:5], # Limit refs } ) genes = data.get("genes", []) gene_name = genes[0].get("name", {}).get("value", "") if genes else "" return { "data": { "disprot_id": data.get("disprot_id"), "acc": data.get("acc"), "name": data.get("name"), "gene": gene_name, "organism": data.get("organism"), "length": data.get("length"), "disorder_content": data.get("disorder_content"), "regions_counter": data.get("regions_counter"), "dataset": data.get("dataset", []), "regions": regions[:30], # Limit to 30 regions }, "metadata": { "source": "DisProt (disprot.org)", "total_regions": data.get("regions_counter", len(regions)), }, }