Source code for tooluniverse.bridgedb_tool

"""
BridgeDb Tool - Biological Identifier Mapping Service

BridgeDb is a framework for mapping identifiers between biological databases.
It supports genes, proteins, metabolites, and other biological entities,
providing cross-references between HMDB, ChEBI, KEGG, PubChem, Ensembl,
UniProt, HGNC, and many more databases.

API base: https://webservice.bridgedb.org
No authentication required.

Reference: van Iersel et al., BMC Bioinformatics 2010, 11:5
"""

import requests
from typing import Dict, Any, Optional, List
from .base_tool import BaseTool
from .tool_registry import register_tool


BRIDGEDB_BASE_URL = "https://webservice.bridgedb.org"

# System code mapping for common databases
SYSTEM_CODES = {
    "HMDB": "Ch",
    "ChEBI": "Ce",
    "KEGG Compound": "Ck",
    "KEGG Drug": "Kd",
    "PubChem-compound": "Cpc",
    "Wikidata": "Wd",
    "CAS": "Ca",
    "Chemspider": "Cs",
    "Ensembl": "En",
    "HGNC": "H",
    "UniProt": "S",
    "NCBI Gene": "L",
    "RefSeq": "Q",
    "KEGG Genes": "Kg",
    "PDB": "Pd",
    "GeneOntology": "T",
    "InChIKey": "Ik",
    "SwissLipids": "Sl",
    "KNApSAcK": "Kn",
    "Rhea": "Rh",
    "MetaCyc": "Mc",
}

# Reverse: code -> name
CODE_TO_NAME = {v: k for k, v in SYSTEM_CODES.items()}


[docs] @register_tool("BridgeDbTool") class BridgeDbTool(BaseTool): """ Tool for mapping biological identifiers across databases using BridgeDb. BridgeDb provides cross-reference lookups for genes, proteins, metabolites, and other biological entities across 45+ databases including HMDB, ChEBI, KEGG, PubChem, Ensembl, UniProt, and HGNC. Supported operations: - xrefs: Get all cross-references for an identifier - search: Search for identifiers by name - attributes: Get properties/attributes of an identifier """
[docs] def __init__(self, tool_config: Dict[str, Any]): super().__init__(tool_config) self.parameter = tool_config.get("parameter", {}) self.required = self.parameter.get("required", []) self.session = requests.Session() self.timeout = 30
[docs] def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Execute the BridgeDb API tool with given arguments.""" operation = arguments.get("operation") if not operation: return {"status": "error", "error": "Missing required parameter: operation"} operation_handlers = { "xrefs": self._get_xrefs, "search": self._search, "attributes": self._get_attributes, } handler = operation_handlers.get(operation) if not handler: return { "status": "error", "error": "Unknown operation: {}. Available: {}".format( operation, list(operation_handlers.keys()) ), } try: return handler(arguments) except requests.exceptions.Timeout: return {"status": "error", "error": "BridgeDb API request timed out"} except requests.exceptions.ConnectionError: return {"status": "error", "error": "Failed to connect to BridgeDb API"} except Exception as e: return { "status": "error", "error": "BridgeDb operation failed: {}".format(str(e)), }
[docs] def _resolve_system_code(self, source: str) -> str: """Resolve a database name or system code to a BridgeDb system code.""" # If it's already a valid 1-3 char system code, use it directly if source in CODE_TO_NAME: return source # Try to match by name (case-insensitive) source_lower = source.lower() for name, code in SYSTEM_CODES.items(): if name.lower() == source_lower: return code # Return as-is if not recognized (let API handle the error) return source
[docs] def _parse_tsv_xrefs(self, text: str) -> List[Dict[str, str]]: """Parse tab-separated cross-reference data into structured list.""" results = [] for line in text.strip().split("\n"): if not line.strip(): continue parts = line.split("\t") if len(parts) >= 2: identifier = parts[0] source_name = parts[1] # Look up the system code from the source name code = SYSTEM_CODES.get(source_name, "") results.append( { "identifier": identifier, "database": source_name, "system_code": code, } ) return results
[docs] def _parse_tsv_attributes(self, text: str) -> Dict[str, Any]: """Parse tab-separated attribute data into structured dict.""" attributes = {} synonyms = [] for line in text.strip().split("\n"): if not line.strip(): continue parts = line.split("\t") if len(parts) >= 2: value = parts[0] key = parts[1] if key == "Synonym": synonyms.append(value) else: attributes[key] = value if synonyms: attributes["Synonyms"] = synonyms return attributes
[docs] def _get_xrefs(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Get cross-references for an identifier.""" identifier = arguments.get("identifier") source = arguments.get("source") organism = arguments.get("organism", "Human") target_source = arguments.get("target_source") if not identifier or not source: return { "status": "error", "error": "Both 'identifier' and 'source' parameters are required", } system_code = self._resolve_system_code(source) url = "{}/{}/xrefs/{}/{}".format( BRIDGEDB_BASE_URL, organism, system_code, identifier ) params = {} if target_source: params["dataSource"] = self._resolve_system_code(target_source) response = self.session.get(url, params=params, timeout=self.timeout) if response.status_code not in (200, 204): return { "status": "error", "error": "BridgeDb returned status {}".format(response.status_code), } if response.status_code == 204 or not response.text.strip(): return { "status": "success", "data": { "query_identifier": identifier, "query_source": source, "organism": organism, "cross_references": [], "count": 0, }, } xrefs = self._parse_tsv_xrefs(response.text) return { "status": "success", "data": { "query_identifier": identifier, "query_source": source, "organism": organism, "cross_references": xrefs, "count": len(xrefs), }, }
[docs] def _get_attributes(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Get attributes/properties for an identifier.""" identifier = arguments.get("identifier") source = arguments.get("source") organism = arguments.get("organism", "Human") if not identifier or not source: return { "status": "error", "error": "Both 'identifier' and 'source' parameters are required", } system_code = self._resolve_system_code(source) url = "{}/{}/attributes/{}/{}".format( BRIDGEDB_BASE_URL, organism, system_code, identifier ) response = self.session.get(url, timeout=self.timeout) if response.status_code not in (200, 204): return { "status": "error", "error": "BridgeDb returned status {}".format(response.status_code), } if response.status_code == 204 or not response.text.strip(): return { "status": "success", "data": { "query_identifier": identifier, "query_source": source, "organism": organism, "attributes": {}, }, } attributes = self._parse_tsv_attributes(response.text) return { "status": "success", "data": { "query_identifier": identifier, "query_source": source, "organism": organism, "attributes": attributes, }, }