tooluniverse.sabiork_tool 源代码
"""
SABIO-RK Biochemical Reaction Kinetics database tool for ToolUniverse.
SABIO-RK (http://sabiork.h-its.org/) contains information about biochemical
reactions, their kinetic equations with parameters and experimental conditions.
API: https://sabiork.h-its.org/sabioRestWebServices/
No authentication required. Free public access.
"""
import xml.etree.ElementTree as ET
from typing import Any, Dict, List, Optional
import requests
from .base_tool import BaseTool
from .tool_registry import register_tool
SABIORK_BASE = "https://sabiork.h-its.org/sabioRestWebServices"
# SBO term mapping for kinetic parameter types
_SBO_PARAM_TYPE = {
"SBO:0000025": "kcat",
"SBO:0000027": "Km",
"SBO:0000261": "Ki",
"SBO:0000302": "kcat/Km",
"SBO:0000186": "Vmax",
"SBO:0000320": "specific_activity",
"SBO:0000022": "forward rate constant",
"SBO:0000038": "reverse rate constant",
"SBO:0000048": "forward unimolecular rate constant",
}
# SABIO-RK unit normalization
_UNIT_MAP = {
"M": "M",
"swedgeone": "s^{-1}",
"Mwedgeoneswedgeone": "M^{-1}*s^{-1}",
}
def _is_no_data_response(text: str) -> bool:
"""Check if SABIO-RK returned a 'no data found' plain-text response."""
return "no data found" in text.lower() or not text.strip().startswith("<")
def _parse_entry_ids(xml_text: str) -> List[str]:
"""Parse entry IDs from SABIO-RK XML response."""
root = ET.fromstring(xml_text)
return [el.text for el in root.findall(".//SabioEntryID") if el.text]
def _extract_annotations(reaction_el, ns: dict) -> Dict[str, str]:
"""Extract identifiers from reaction annotations."""
annotations: Dict[str, str] = {}
rdf_ns = ns.get("rdf", "http://www.w3.org/1999/02/22-rdf-syntax-ns#")
for li in reaction_el.findall(f".//{{{rdf_ns}}}li"):
resource = li.get(f"{{{rdf_ns}}}resource", "")
if "ec-code" in resource:
annotations["ec_number"] = resource.split("/")[-1]
elif "kegg.reaction" in resource:
annotations["kegg_reaction"] = resource.split("/")[-1]
elif "sabiork.reaction" in resource:
annotations["sabiork_reaction_id"] = resource.split("/")[-1]
elif "bto/" in resource:
annotations["tissue_bto"] = resource.split("/")[-1]
elif "taxonomy" in resource:
annotations["taxonomy_id"] = resource.split("/")[-1]
elif "pubmed" in resource:
annotations.setdefault("pubmed_ids", [])
annotations["pubmed_ids"].append(resource.split("/")[-1])
return annotations
def _parse_sbml_kinetics(xml_text: str) -> List[Dict[str, Any]]:
"""Parse SBML XML from SABIO-RK into structured kinetic law records."""
ns = {
"sbml": "http://www.sbml.org/sbml/level3/version1/core",
"rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
}
root = ET.fromstring(xml_text)
# Build species ID -> name map
species_map: Dict[str, str] = {}
for sp in root.findall(".//sbml:species", ns):
sp_id = sp.get("id", "")
sp_name = sp.get("name", sp_id)
species_map[sp_id] = sp_name
# Parse reactions
records: List[Dict[str, Any]] = []
reactions = root.findall(".//sbml:reaction", ns)
for rxn in reactions:
annotations = _extract_annotations(rxn, ns)
# Substrates and products
substrates = []
for sr in rxn.findall(".//sbml:listOfReactants/sbml:speciesReference", ns):
sp_id = sr.get("species", "")
substrates.append(species_map.get(sp_id, sp_id))
products = []
for sr in rxn.findall(".//sbml:listOfProducts/sbml:speciesReference", ns):
sp_id = sr.get("species", "")
products.append(species_map.get(sp_id, sp_id))
# Kinetic parameters
parameters: List[Dict[str, Any]] = []
for lp in rxn.findall(".//sbml:localParameter", ns):
sbo = lp.get("sboTerm", "")
param_type = _SBO_PARAM_TYPE.get(sbo, sbo)
param_name = lp.get("name", lp.get("id", ""))
value_str = lp.get("value", "")
unit_raw = lp.get("units", "")
unit = _UNIT_MAP.get(unit_raw, unit_raw)
try:
value = float(value_str)
except (ValueError, TypeError):
value = value_str
parameters.append(
{
"type": param_type,
"name": param_name,
"value": value,
"unit": unit,
"sbo_term": sbo,
}
)
record: Dict[str, Any] = {
"substrates": substrates,
"products": products,
"parameters": parameters,
}
record.update(annotations)
records.append(record)
return records
[文档]
@register_tool("SABIORKTool")
class SABIORKTool(BaseTool):
"""
Tool for querying SABIO-RK biochemical reaction kinetics database.
Retrieves kinetic parameters (Km, kcat, Vmax, Ki, etc.) with experimental
conditions, organism, and literature references.
No authentication required.
"""
[文档]
def __init__(self, tool_config: Dict[str, Any]):
super().__init__(tool_config)
self.timeout = tool_config.get("timeout", 30)
[文档]
def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
operation = arguments.get("operation", "") or self.get_schema_const_operation()
dispatch = {
"search_reactions": self._search_reactions,
}
handler = dispatch.get(operation)
if handler is None:
return {
"status": "error",
"error": f"Unknown operation: {operation}. Supported: {', '.join(dispatch)}",
}
try:
return handler(arguments)
except requests.exceptions.Timeout:
return {
"status": "error",
"error": f"SABIO-RK API timed out after {self.timeout}s",
}
except requests.exceptions.ConnectionError:
return {
"status": "error",
"error": "Failed to connect to SABIO-RK API",
}
except ET.ParseError as e:
return {
"status": "error",
"error": f"Failed to parse SABIO-RK XML response: {e}",
}
except Exception as e:
return {
"status": "error",
"error": f"SABIO-RK query failed: {e}",
}
[文档]
def _build_query(self, arguments: Dict[str, Any]) -> str:
"""Build SABIO-RK search query string from arguments."""
parts = []
ec = arguments.get("ec_number", "")
if ec:
parts.append(f"ecnumber:{ec}")
ename = arguments.get("enzyme_name", "")
if ename:
parts.append(f'EnzymeName:"{ename}"')
substrate = arguments.get("substrate", "")
if substrate:
parts.append(f'Substrate:"{substrate}"')
organism = arguments.get("organism", "")
if organism:
parts.append(f'Organism:"{organism}"')
product = arguments.get("product", "")
if product:
parts.append(f'Product:"{product}"')
param_type = arguments.get("parameter_type", "")
if param_type:
parts.append(f'parametertype:"{param_type}"')
if not parts:
return ""
return " AND ".join(parts)
[文档]
def _search_reactions(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Search SABIO-RK for kinetic laws matching the query."""
query = self._build_query(arguments)
if not query:
return {
"status": "error",
"error": "At least one search parameter required: ec_number, enzyme_name, substrate, organism, or product",
}
limit = int(arguments.get("limit", 20))
# Step 1: Get entry IDs
url = f"{SABIORK_BASE}/searchKineticLaws/entryIDs?q={query}"
resp = requests.get(url, timeout=self.timeout)
if resp.status_code != 200:
return {
"status": "error",
"error": f"SABIO-RK search returned HTTP {resp.status_code}",
}
if _is_no_data_response(resp.text):
return {
"status": "success",
"data": {
"query": query,
"kinetic_laws": [],
"total_count": 0,
"returned_count": 0,
},
"metadata": {"source": "SABIO-RK", "url": url},
}
entry_ids = _parse_entry_ids(resp.text)
total_count = len(entry_ids)
# Step 2: Fetch SBML for limited set of entries
fetch_ids = entry_ids[:limit]
ids_str = ",".join(fetch_ids)
sbml_url = f"{SABIORK_BASE}/kineticLaws?kinlawids={ids_str}"
resp2 = requests.get(sbml_url, timeout=self.timeout)
if resp2.status_code != 200:
return {
"status": "error",
"error": f"SABIO-RK SBML fetch returned HTTP {resp2.status_code}",
}
records = _parse_sbml_kinetics(resp2.text)
# Attach entry IDs to records
for i, rec in enumerate(records):
if i < len(fetch_ids):
rec["entry_id"] = fetch_ids[i]
return {
"status": "success",
"data": {
"query": query,
"kinetic_laws": records,
"total_count": total_count,
"returned_count": len(records),
},
"metadata": {
"source": "SABIO-RK",
"url": f"http://sabiork.h-its.org/",
"note": f"Showing {len(records)} of {total_count} kinetic laws",
},
}