tooluniverse.t3db_tool 源代码

"""
T3DB Tool - Toxin and Toxin-Target Database

Provides access to T3DB (www.t3db.ca) for toxin information including
chemical properties, targets, health effects, and mechanisms of toxicity.

API: https://www.t3db.ca/toxins/{id}.xml
No authentication required.

Reference: Wishart et al., Nucleic Acids Res. 2015
"""

import requests
import xmltodict
from typing import Dict, Any
from .base_tool import BaseTool
from .tool_registry import register_tool


T3DB_BASE = "https://www.t3db.ca"


[文档] @register_tool("T3DBTool") class T3DBTool(BaseTool): """ Tool for querying the Toxin and Toxin-Target Database (T3DB). Supported operations: - get_toxin: Get detailed toxin info by T3DB ID - search_toxins: Search toxins by name """
[文档] def __init__(self, tool_config: Dict[str, Any]): super().__init__(tool_config) self.timeout = 30 self.endpoint_type = tool_config.get("fields", {}).get( "endpoint_type", "get_toxin" ) self.session = requests.Session() self.session.headers.update( {"User-Agent": "ToolUniverse/1.0", "Accept": "application/xml"} )
[文档] def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]: try: if self.endpoint_type == "get_toxin": return self._get_toxin(arguments) elif self.endpoint_type == "search_toxins": return self._search_toxins(arguments) return { "status": "error", "error": f"Unknown endpoint: {self.endpoint_type}", } except requests.exceptions.Timeout: return {"status": "error", "error": "T3DB API request timed out"} except requests.exceptions.ConnectionError: return {"status": "error", "error": "Failed to connect to T3DB"} except Exception as e: return {"status": "error", "error": f"T3DB error: {str(e)}"}
[文档] def _get_toxin(self, arguments: Dict[str, Any]) -> Dict[str, Any]: toxin_id = arguments.get("toxin_id") or arguments.get("id", "") if not toxin_id: return { "status": "error", "error": "toxin_id is required (e.g., 'T3D0001')", } if not toxin_id.startswith("T3D"): toxin_id = f"T3D{toxin_id.zfill(4)}" resp = self.session.get( f"{T3DB_BASE}/toxins/{toxin_id}.xml", timeout=self.timeout ) if resp.status_code == 404: return {"status": "error", "error": f"Toxin {toxin_id} not found"} resp.raise_for_status() data = xmltodict.parse(resp.text) compound = data.get("compound", {}) # Extract targets — T3DB stores targets as text with embedded UniProt IDs import re targets = [] target_text = compound.get("target", "") if isinstance(target_text, str) and target_text.strip(): # Parse "Protein Name (UniProt_ID)" patterns entries = re.findall( r"([^()\n]+?)\s*\(([A-Z][A-Z0-9]{4}[0-9])\)", target_text ) for name, uniprot in entries: targets.append({"name": name.strip(), "uniprot_id": uniprot}) elif isinstance(target_text, dict): targets.append( { "name": target_text.get("name"), "uniprot_id": target_text.get("uniprot-id"), } ) return { "status": "success", "data": { "id": toxin_id, "name": compound.get("common-name"), "description": (compound.get("description") or "")[:500], "cas": compound.get("cas"), "pubchem_id": compound.get("pubchem-id"), "formula": compound.get("chemical-formula"), "weight": compound.get("weight"), "route_of_exposure": compound.get("route-of-exposure"), "mechanism_of_toxicity": (compound.get("mechanism-of-toxicity") or "")[ :500 ], "health_effects": (compound.get("health-effects") or "")[:500], "targets": targets, }, "metadata": {"source": "T3DB", "toxin_id": toxin_id}, }
[文档] def _search_toxins(self, arguments: Dict[str, Any]) -> Dict[str, Any]: query = arguments.get("query") or arguments.get("name", "") if not query: return {"status": "error", "error": "query is required"} # T3DB doesn't have a search API — use the unison search page resp = self.session.get( f"{T3DB_BASE}/unearth/q", params={"query": query, "searcher": "toxins", "button": ""}, timeout=self.timeout, ) if resp.status_code != 200: return { "status": "error", "error": f"T3DB search returned HTTP {resp.status_code}. " "Try searching by T3DB ID directly (e.g., T3D0001).", } # Parse HTML to extract toxin IDs and names import re ids = re.findall(r'href="/toxins/(T3D\d+)"', resp.text) names = re.findall(r'<td class="name"[^>]*>([^<]+)</td>', resp.text) results = [] for i, tid in enumerate(ids[:10]): results.append( { "id": tid, "name": names[i] if i < len(names) else None, } ) if not results: # Fallback: try extracting from result links links = re.findall(r'href="/toxins/(T3D\d+)"[^>]*>([^<]+)<', resp.text) for tid, name in links[:10]: results.append({"id": tid, "name": name.strip()}) return { "status": "success", "data": results, "metadata": { "query": query, "returned": len(results), "source": "T3DB", "note": "Use T3DB_get_toxin with the ID for detailed info", }, }