tooluniverse.t3db_tool 源代码
"""
T3DB Tool - Toxin and Toxin-Target Database
Provides access to T3DB (www.t3db.ca) for toxin information including
chemical properties, targets, health effects, and mechanisms of toxicity.
API: https://www.t3db.ca/toxins/{id}.xml
No authentication required.
Reference: Wishart et al., Nucleic Acids Res. 2015
"""
import requests
import xmltodict
from typing import Dict, Any
from .base_tool import BaseTool
from .tool_registry import register_tool
T3DB_BASE = "https://www.t3db.ca"
[文档]
@register_tool("T3DBTool")
class T3DBTool(BaseTool):
"""
Tool for querying the Toxin and Toxin-Target Database (T3DB).
Supported operations:
- get_toxin: Get detailed toxin info by T3DB ID
- search_toxins: Search toxins by name
"""
[文档]
def __init__(self, tool_config: Dict[str, Any]):
super().__init__(tool_config)
self.timeout = 30
self.endpoint_type = tool_config.get("fields", {}).get(
"endpoint_type", "get_toxin"
)
self.session = requests.Session()
self.session.headers.update(
{"User-Agent": "ToolUniverse/1.0", "Accept": "application/xml"}
)
[文档]
def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
try:
if self.endpoint_type == "get_toxin":
return self._get_toxin(arguments)
elif self.endpoint_type == "search_toxins":
return self._search_toxins(arguments)
return {
"status": "error",
"error": f"Unknown endpoint: {self.endpoint_type}",
}
except requests.exceptions.Timeout:
return {"status": "error", "error": "T3DB API request timed out"}
except requests.exceptions.ConnectionError:
return {"status": "error", "error": "Failed to connect to T3DB"}
except Exception as e:
return {"status": "error", "error": f"T3DB error: {str(e)}"}
[文档]
def _get_toxin(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
toxin_id = arguments.get("toxin_id") or arguments.get("id", "")
if not toxin_id:
return {
"status": "error",
"error": "toxin_id is required (e.g., 'T3D0001')",
}
if not toxin_id.startswith("T3D"):
toxin_id = f"T3D{toxin_id.zfill(4)}"
resp = self.session.get(
f"{T3DB_BASE}/toxins/{toxin_id}.xml", timeout=self.timeout
)
if resp.status_code == 404:
return {"status": "error", "error": f"Toxin {toxin_id} not found"}
resp.raise_for_status()
data = xmltodict.parse(resp.text)
compound = data.get("compound", {})
# Extract targets — T3DB stores targets as text with embedded UniProt IDs
import re
targets = []
target_text = compound.get("target", "")
if isinstance(target_text, str) and target_text.strip():
# Parse "Protein Name (UniProt_ID)" patterns
entries = re.findall(
r"([^()\n]+?)\s*\(([A-Z][A-Z0-9]{4}[0-9])\)", target_text
)
for name, uniprot in entries:
targets.append({"name": name.strip(), "uniprot_id": uniprot})
elif isinstance(target_text, dict):
targets.append(
{
"name": target_text.get("name"),
"uniprot_id": target_text.get("uniprot-id"),
}
)
return {
"status": "success",
"data": {
"id": toxin_id,
"name": compound.get("common-name"),
"description": (compound.get("description") or "")[:500],
"cas": compound.get("cas"),
"pubchem_id": compound.get("pubchem-id"),
"formula": compound.get("chemical-formula"),
"weight": compound.get("weight"),
"route_of_exposure": compound.get("route-of-exposure"),
"mechanism_of_toxicity": (compound.get("mechanism-of-toxicity") or "")[
:500
],
"health_effects": (compound.get("health-effects") or "")[:500],
"targets": targets,
},
"metadata": {"source": "T3DB", "toxin_id": toxin_id},
}
[文档]
def _search_toxins(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
query = arguments.get("query") or arguments.get("name", "")
if not query:
return {"status": "error", "error": "query is required"}
# T3DB doesn't have a search API — use the unison search page
resp = self.session.get(
f"{T3DB_BASE}/unearth/q",
params={"query": query, "searcher": "toxins", "button": ""},
timeout=self.timeout,
)
if resp.status_code != 200:
return {
"status": "error",
"error": f"T3DB search returned HTTP {resp.status_code}. "
"Try searching by T3DB ID directly (e.g., T3D0001).",
}
# Parse HTML to extract toxin IDs and names
import re
ids = re.findall(r'href="/toxins/(T3D\d+)"', resp.text)
names = re.findall(r'<td class="name"[^>]*>([^<]+)</td>', resp.text)
results = []
for i, tid in enumerate(ids[:10]):
results.append(
{
"id": tid,
"name": names[i] if i < len(names) else None,
}
)
if not results:
# Fallback: try extracting from result links
links = re.findall(r'href="/toxins/(T3D\d+)"[^>]*>([^<]+)<', resp.text)
for tid, name in links[:10]:
results.append({"id": tid, "name": name.strip()})
return {
"status": "success",
"data": results,
"metadata": {
"query": query,
"returned": len(results),
"source": "T3DB",
"note": "Use T3DB_get_toxin with the ID for detailed info",
},
}