Source code for tooluniverse.bridgedb_tool
"""
BridgeDb Tool - Biological Identifier Mapping Service
BridgeDb is a framework for mapping identifiers between biological databases.
It supports genes, proteins, metabolites, and other biological entities,
providing cross-references between HMDB, ChEBI, KEGG, PubChem, Ensembl,
UniProt, HGNC, and many more databases.
API base: https://webservice.bridgedb.org
No authentication required.
Reference: van Iersel et al., BMC Bioinformatics 2010, 11:5
"""
import requests
from typing import Dict, Any, Optional, List
from .base_tool import BaseTool
from .tool_registry import register_tool
BRIDGEDB_BASE_URL = "https://webservice.bridgedb.org"
# System code mapping for common databases
SYSTEM_CODES = {
"HMDB": "Ch",
"ChEBI": "Ce",
"KEGG Compound": "Ck",
"KEGG Drug": "Kd",
"PubChem-compound": "Cpc",
"Wikidata": "Wd",
"CAS": "Ca",
"Chemspider": "Cs",
"Ensembl": "En",
"HGNC": "H",
"UniProt": "S",
"NCBI Gene": "L",
"RefSeq": "Q",
"KEGG Genes": "Kg",
"PDB": "Pd",
"GeneOntology": "T",
"InChIKey": "Ik",
"SwissLipids": "Sl",
"KNApSAcK": "Kn",
"Rhea": "Rh",
"MetaCyc": "Mc",
}
# Reverse: code -> name
CODE_TO_NAME = {v: k for k, v in SYSTEM_CODES.items()}
[docs]
@register_tool("BridgeDbTool")
class BridgeDbTool(BaseTool):
"""
Tool for mapping biological identifiers across databases using BridgeDb.
BridgeDb provides cross-reference lookups for genes, proteins, metabolites,
and other biological entities across 45+ databases including HMDB, ChEBI,
KEGG, PubChem, Ensembl, UniProt, and HGNC.
Supported operations:
- xrefs: Get all cross-references for an identifier
- search: Search for identifiers by name
- attributes: Get properties/attributes of an identifier
"""
[docs]
def __init__(self, tool_config: Dict[str, Any]):
super().__init__(tool_config)
self.parameter = tool_config.get("parameter", {})
self.required = self.parameter.get("required", [])
self.session = requests.Session()
self.timeout = 30
[docs]
def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Execute the BridgeDb API tool with given arguments."""
operation = arguments.get("operation")
if not operation:
return {"status": "error", "error": "Missing required parameter: operation"}
operation_handlers = {
"xrefs": self._get_xrefs,
"search": self._search,
"attributes": self._get_attributes,
}
handler = operation_handlers.get(operation)
if not handler:
return {
"status": "error",
"error": "Unknown operation: {}. Available: {}".format(
operation, list(operation_handlers.keys())
),
}
try:
return handler(arguments)
except requests.exceptions.Timeout:
return {"status": "error", "error": "BridgeDb API request timed out"}
except requests.exceptions.ConnectionError:
return {"status": "error", "error": "Failed to connect to BridgeDb API"}
except Exception as e:
return {
"status": "error",
"error": "BridgeDb operation failed: {}".format(str(e)),
}
[docs]
def _resolve_system_code(self, source: str) -> str:
"""Resolve a database name or system code to a BridgeDb system code."""
# If it's already a valid 1-3 char system code, use it directly
if source in CODE_TO_NAME:
return source
# Try to match by name (case-insensitive)
source_lower = source.lower()
for name, code in SYSTEM_CODES.items():
if name.lower() == source_lower:
return code
# Return as-is if not recognized (let API handle the error)
return source
[docs]
def _parse_tsv_xrefs(self, text: str) -> List[Dict[str, str]]:
"""Parse tab-separated cross-reference data into structured list."""
results = []
for line in text.strip().split("\n"):
if not line.strip():
continue
parts = line.split("\t")
if len(parts) >= 2:
identifier = parts[0]
source_name = parts[1]
# Look up the system code from the source name
code = SYSTEM_CODES.get(source_name, "")
results.append(
{
"identifier": identifier,
"database": source_name,
"system_code": code,
}
)
return results
[docs]
def _parse_tsv_attributes(self, text: str) -> Dict[str, Any]:
"""Parse tab-separated attribute data into structured dict."""
attributes = {}
synonyms = []
for line in text.strip().split("\n"):
if not line.strip():
continue
parts = line.split("\t")
if len(parts) >= 2:
value = parts[0]
key = parts[1]
if key == "Synonym":
synonyms.append(value)
else:
attributes[key] = value
if synonyms:
attributes["Synonyms"] = synonyms
return attributes
[docs]
def _get_xrefs(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Get cross-references for an identifier."""
identifier = arguments.get("identifier")
source = arguments.get("source")
organism = arguments.get("organism", "Human")
target_source = arguments.get("target_source")
if not identifier or not source:
return {
"status": "error",
"error": "Both 'identifier' and 'source' parameters are required",
}
system_code = self._resolve_system_code(source)
url = "{}/{}/xrefs/{}/{}".format(
BRIDGEDB_BASE_URL, organism, system_code, identifier
)
params = {}
if target_source:
params["dataSource"] = self._resolve_system_code(target_source)
response = self.session.get(url, params=params, timeout=self.timeout)
if response.status_code not in (200, 204):
return {
"status": "error",
"error": "BridgeDb returned status {}".format(response.status_code),
}
if response.status_code == 204 or not response.text.strip():
return {
"status": "success",
"data": {
"query_identifier": identifier,
"query_source": source,
"organism": organism,
"cross_references": [],
"count": 0,
},
}
xrefs = self._parse_tsv_xrefs(response.text)
return {
"status": "success",
"data": {
"query_identifier": identifier,
"query_source": source,
"organism": organism,
"cross_references": xrefs,
"count": len(xrefs),
},
}
[docs]
def _search(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Search for identifiers by name."""
query = arguments.get("query")
organism = arguments.get("organism", "Human")
if not query:
return {"status": "error", "error": "query parameter is required"}
url = "{}/{}/search/{}".format(BRIDGEDB_BASE_URL, organism, query)
response = self.session.get(url, timeout=self.timeout)
if response.status_code not in (200, 204):
return {
"status": "error",
"error": "BridgeDb returned status {}".format(response.status_code),
}
if response.status_code == 204 or not response.text.strip():
return {
"status": "success",
"data": {
"query": query,
"organism": organism,
"results": [],
"count": 0,
},
}
results = self._parse_tsv_xrefs(response.text)
return {
"status": "success",
"data": {
"query": query,
"organism": organism,
"results": results,
"count": len(results),
},
}
[docs]
def _get_attributes(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Get attributes/properties for an identifier."""
identifier = arguments.get("identifier")
source = arguments.get("source")
organism = arguments.get("organism", "Human")
if not identifier or not source:
return {
"status": "error",
"error": "Both 'identifier' and 'source' parameters are required",
}
system_code = self._resolve_system_code(source)
url = "{}/{}/attributes/{}/{}".format(
BRIDGEDB_BASE_URL, organism, system_code, identifier
)
response = self.session.get(url, timeout=self.timeout)
if response.status_code not in (200, 204):
return {
"status": "error",
"error": "BridgeDb returned status {}".format(response.status_code),
}
if response.status_code == 204 or not response.text.strip():
return {
"status": "success",
"data": {
"query_identifier": identifier,
"query_source": source,
"organism": organism,
"attributes": {},
},
}
attributes = self._parse_tsv_attributes(response.text)
return {
"status": "success",
"data": {
"query_identifier": identifier,
"query_source": source,
"organism": organism,
"attributes": attributes,
},
}