Source code for tooluniverse.omnipath_tool

# omnipath_tool.py
"""
OmniPath API tool for ToolUniverse.

OmniPath is the largest integrated database of intra- and intercellular
signaling knowledge. It combines data from 100+ resources including
CellPhoneDB, CellChatDB, CellTalkDB, SIGNOR, KEGG, Reactome, and more.

Provides access to:
- Ligand-receptor interactions (cell-cell communication)
- Intercellular role annotations (ligand/receptor classification)
- Signaling pathway interactions (directed, signed PPI)
- Protein complex compositions
- Cell communication annotations (from CellPhoneDB/CellChatDB/etc.)
- Enzyme-substrate (PTM) relationships

API: https://omnipathdb.org/
No authentication required. JSON format supported.
"""

import requests
from typing import Dict, Any
from .base_tool import BaseTool
from .tool_registry import register_tool

OMNIPATH_BASE_URL = "https://omnipathdb.org"


[docs] @register_tool("OmniPathTool") class OmniPathTool(BaseTool): """ Tool for querying OmniPath intercellular and intracellular signaling data. OmniPath integrates 100+ databases covering: - Ligand-receptor interactions (14,000+ pairs) - Intercellular communication roles - Signaling pathway interactions - Protein complexes (22,000+) - Cell communication annotations - Enzyme-substrate relationships No authentication required. """
[docs] def __init__(self, tool_config: Dict[str, Any]): super().__init__(tool_config) self.timeout = tool_config.get("timeout", 30) fields = tool_config.get("fields", {}) self.endpoint = fields.get("endpoint", "ligand_receptor")
[docs] def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Execute the OmniPath API call.""" try: return self._query(arguments) except requests.exceptions.Timeout: return {"error": f"OmniPath API timed out after {self.timeout}s"} except requests.exceptions.ConnectionError: return {"error": "Failed to connect to OmniPath API at omnipathdb.org"} except requests.exceptions.HTTPError as e: return {"error": f"OmniPath API HTTP error: {e.response.status_code}"} except Exception as e: return {"error": f"Unexpected error querying OmniPath: {str(e)}"}
[docs] def _query(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Route to appropriate OmniPath endpoint.""" if self.endpoint == "ligand_receptor": return self._get_ligand_receptor(arguments) elif self.endpoint == "intercell": return self._get_intercell(arguments) elif self.endpoint == "signaling": return self._get_signaling(arguments) elif self.endpoint == "complexes": return self._get_complexes(arguments) elif self.endpoint == "annotations": return self._get_annotations(arguments) elif self.endpoint == "enz_sub": return self._get_enzyme_substrate(arguments) elif self.endpoint == "tf_target": return self._get_tf_target_interactions(arguments) elif self.endpoint == "dorothea": return self._get_dorothea_regulon(arguments) else: return {"error": f"Unknown endpoint: {self.endpoint}"}
[docs] def _make_request(self, path: str, params: Dict[str, Any]) -> Any: """Make an HTTP request to OmniPath API.""" url = f"{OMNIPATH_BASE_URL}/{path}" # Always request JSON format params["format"] = "json" response = requests.get(url, params=params, timeout=self.timeout) # Check for OmniPath text error responses if response.headers.get("content-type", "").startswith("text/plain"): text = response.text.strip() if ( "not entirely good" in text.lower() or "unknown argument" in text.lower() ): raise ValueError(f"OmniPath API error: {text[:200]}") response.raise_for_status() return response.json()
[docs] def _get_ligand_receptor(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Get ligand-receptor interactions from the ligrecextra dataset.""" params = { "genesymbols": "yes", "fields": "sources,references,curation_effort,type", "datasets": "ligrecextra", } # Add protein filters if arguments.get("partners"): params["partners"] = arguments["partners"] if arguments.get("sources"): params["sources"] = arguments["sources"] if arguments.get("targets"): params["targets"] = arguments["targets"] if arguments.get("databases"): params["databases"] = arguments["databases"] if arguments.get("organisms"): params["organisms"] = str(arguments["organisms"]) if arguments.get("limit"): params["limit"] = str(arguments["limit"]) # Must have at least one protein filter if not any(arguments.get(k) for k in ["partners", "sources", "targets"]): return { "error": "At least one of 'partners', 'sources', or 'targets' is required to query ligand-receptor interactions." } data = self._make_request("interactions/", params) if not isinstance(data, list): return {"error": f"Unexpected response format from OmniPath: {type(data)}"} interactions = [] for item in data: interactions.append( { "source_uniprot": item.get("source"), "target_uniprot": item.get("target"), "source_genesymbol": item.get("source_genesymbol"), "target_genesymbol": item.get("target_genesymbol"), "is_directed": bool(item.get("is_directed", 0)), "is_stimulation": bool(item.get("is_stimulation", 0)), "is_inhibition": bool(item.get("is_inhibition", 0)), "sources": item.get("sources", []), "curation_effort": item.get("curation_effort"), "type": item.get("type"), } ) # Sort by curation effort (most curated first) interactions.sort(key=lambda x: x.get("curation_effort") or 0, reverse=True) return { "data": interactions, "metadata": { "source": "OmniPath (omnipathdb.org)", "dataset": "ligrecextra", "total_interactions": len(interactions), "description": "Ligand-receptor interactions from CellPhoneDB, CellChatDB, CellTalkDB, and 20+ databases", }, }
[docs] def _get_intercell(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Get intercellular communication role annotations.""" params = {} if arguments.get("proteins"): params["proteins"] = arguments["proteins"] if arguments.get("categories"): params["categories"] = arguments["categories"] if arguments.get("scope"): params["scope"] = arguments["scope"] if arguments.get("transmitter") is not None: params["transmitter"] = "yes" if arguments["transmitter"] else "no" if arguments.get("receiver") is not None: params["receiver"] = "yes" if arguments["receiver"] else "no" if arguments.get("secreted") is not None: params["secreted"] = "yes" if arguments["secreted"] else "no" if arguments.get("limit"): params["limit"] = str(arguments["limit"]) data = self._make_request("intercell/", params) if not isinstance(data, list): return {"error": f"Unexpected response format from OmniPath: {type(data)}"} results = [] for item in data: results.append( { "uniprot": item.get("uniprot"), "genesymbol": item.get("genesymbol"), "category": item.get("category"), "parent": item.get("parent"), "database": item.get("database"), "scope": item.get("scope"), "aspect": item.get("aspect"), "consensus_score": item.get("consensus_score"), "transmitter": item.get("transmitter", False), "receiver": item.get("receiver", False), "secreted": item.get("secreted", False), "plasma_membrane_transmembrane": item.get( "plasma_membrane_transmembrane", False ), } ) return { "data": results, "metadata": { "source": "OmniPath Intercell (omnipathdb.org)", "total_annotations": len(results), "description": "Intercellular communication role annotations from 40+ databases", }, }
[docs] def _get_signaling(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Get intracellular signaling pathway interactions.""" datasets = arguments.get("datasets") or "omnipath" params = { "genesymbols": "yes", "fields": "sources,references,curation_effort,type", "datasets": datasets, } if arguments.get("partners"): params["partners"] = arguments["partners"] if arguments.get("sources"): params["sources"] = arguments["sources"] if arguments.get("targets"): params["targets"] = arguments["targets"] if arguments.get("directed") is not None: params["directed"] = "yes" if arguments["directed"] else "no" if arguments.get("signed") is not None: params["signed"] = "yes" if arguments["signed"] else "no" if arguments.get("organisms"): params["organisms"] = str(arguments["organisms"]) if arguments.get("limit"): params["limit"] = str(arguments["limit"]) if not any(arguments.get(k) for k in ["partners", "sources", "targets"]): return { "error": "At least one of 'partners', 'sources', or 'targets' is required to query signaling interactions." } data = self._make_request("interactions/", params) if not isinstance(data, list): return {"error": f"Unexpected response format from OmniPath: {type(data)}"} interactions = [] for item in data: interactions.append( { "source_uniprot": item.get("source"), "target_uniprot": item.get("target"), "source_genesymbol": item.get("source_genesymbol"), "target_genesymbol": item.get("target_genesymbol"), "is_directed": bool(item.get("is_directed", 0)), "is_stimulation": bool(item.get("is_stimulation", 0)), "is_inhibition": bool(item.get("is_inhibition", 0)), "consensus_direction": bool(item.get("consensus_direction", 0)), "consensus_stimulation": bool(item.get("consensus_stimulation", 0)), "consensus_inhibition": bool(item.get("consensus_inhibition", 0)), "sources": item.get("sources", []), "curation_effort": item.get("curation_effort"), "type": item.get("type"), } ) interactions.sort(key=lambda x: x.get("curation_effort") or 0, reverse=True) return { "data": interactions, "metadata": { "source": "OmniPath (omnipathdb.org)", "datasets": datasets, "total_interactions": len(interactions), }, }
[docs] def _get_complexes(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Get protein complex compositions.""" proteins = arguments.get("proteins", "") if not proteins: return { "error": "proteins parameter is required (UniProt accession(s), e.g., P01137)" } params = {"proteins": proteins} if arguments.get("databases"): params["databases"] = arguments["databases"] data = self._make_request("complexes/", params) if not isinstance(data, list): return {"error": f"Unexpected response format from OmniPath: {type(data)}"} complexes = [] for item in data: complexes.append( { "name": item.get("name"), "components": item.get("components"), "components_genesymbols": item.get("components_genesymbols"), "stoichiometry": item.get("stoichiometry"), "sources": item.get("sources", []), "references": item.get("references"), "identifiers": item.get("identifiers"), } ) return { "data": complexes, "metadata": { "source": "OmniPath Complexes (omnipathdb.org)", "query_proteins": proteins, "total_complexes": len(complexes), "description": "Protein complexes from CORUM, CellPhoneDB, ComplexPortal, and other databases", }, }
[docs] def _get_annotations(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Get cell communication annotations from CellPhoneDB, CellChatDB, etc.""" proteins = arguments.get("proteins", "") if not proteins: return { "error": "proteins parameter is required (UniProt accession(s) or gene symbol(s))" } databases = arguments.get("databases") or "CellPhoneDB,CellChatDB" genesymbols = arguments.get("genesymbols") if genesymbols is None: genesymbols = True params = { "proteins": proteins, "databases": databases, "genesymbols": "yes" if genesymbols else "no", } data = self._make_request("annotations/", params) if not isinstance(data, list): return {"error": f"Unexpected response format from OmniPath: {type(data)}"} annotations = [] for item in data: annotations.append( { "uniprot": item.get("uniprot"), "genesymbol": item.get("genesymbol"), "entity_type": item.get("entity_type"), "source": item.get("source"), "label": item.get("label"), "value": item.get("value"), "record_id": item.get("record_id"), } ) return { "data": annotations, "metadata": { "source": "OmniPath Annotations (omnipathdb.org)", "databases_queried": databases, "total_annotations": len(annotations), }, }
[docs] def _get_enzyme_substrate(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Get enzyme-substrate (PTM) interactions.""" params = { "genesymbols": "yes", "fields": "sources,references", } if arguments.get("enzymes"): params["enzymes"] = arguments["enzymes"] if arguments.get("substrates"): params["substrates"] = arguments["substrates"] if arguments.get("types"): params["types"] = arguments["types"] if arguments.get("organisms"): params["organisms"] = str(arguments["organisms"]) if arguments.get("limit"): params["limit"] = str(arguments["limit"]) if not any(arguments.get(k) for k in ["enzymes", "substrates"]): return {"error": "At least one of 'enzymes' or 'substrates' is required."} data = self._make_request("enz_sub/", params) if not isinstance(data, list): return {"error": f"Unexpected response format from OmniPath: {type(data)}"} results = [] for item in data: results.append( { "enzyme_uniprot": item.get("enzyme"), "substrate_uniprot": item.get("substrate"), "enzyme_genesymbol": item.get("enzyme_genesymbol"), "substrate_genesymbol": item.get("substrate_genesymbol"), "residue_type": item.get("residue_type"), "residue_offset": item.get("residue_offset"), "modification": item.get("modification"), "sources": item.get("sources", []), } ) return { "data": results, "metadata": { "source": "OmniPath Enzyme-Substrate (omnipathdb.org)", "total_ptms": len(results), "description": "PTM data from PhosphoSite, phosphoELM, dbPTM, SIGNOR, and other databases", }, }
[docs] def _get_tf_target_interactions(self, arguments: Dict[str, Any]) -> Dict[str, Any]: tf_gene = arguments.get("tf_gene") target_gene = arguments.get("target_gene") confidence_level = arguments.get("confidence_level") if not tf_gene and not target_gene: return {"error": "At least one of 'tf_gene' or 'target_gene' is required"} params = { "genesymbols": "yes", "fields": "sources,references,curation_effort,type", "datasets": "dorothea,collectri", } if tf_gene: params["sources"] = tf_gene if target_gene: params["targets"] = target_gene data = self._make_request("interactions/", params) if not isinstance(data, list): return {"error": f"Unexpected response format from OmniPath: {type(data)}"} interactions = [] for item in data: dorothea_level = item.get("dorothea_level") if ( confidence_level and dorothea_level and dorothea_level != confidence_level ): continue interactions.append( { "tf_uniprot": item.get("source"), "target_uniprot": item.get("target"), "tf_genesymbol": item.get("source_genesymbol"), "target_genesymbol": item.get("target_genesymbol"), "is_stimulation": bool(item.get("is_stimulation", 0)), "is_inhibition": bool(item.get("is_inhibition", 0)), "dorothea_level": dorothea_level, "sources": item.get("sources", []), "curation_effort": item.get("curation_effort"), } ) interactions.sort(key=lambda x: x.get("curation_effort") or 0, reverse=True) return { "data": interactions, "metadata": { "source": "OmniPath DoRothEA + CollecTRI (omnipathdb.org)", "total_interactions": len(interactions), "description": "TF-target interactions from DoRothEA and CollecTRI regulons", }, }
[docs] def _get_dorothea_regulon(self, arguments: Dict[str, Any]) -> Dict[str, Any]: tf_gene = arguments.get("tf_gene") if not tf_gene: return {"error": "'tf_gene' is required for DoRothEA regulon query"} confidence_levels = arguments.get("confidence_levels") params = { "genesymbols": "yes", "fields": "sources,references,curation_effort,type", "datasets": "dorothea", "sources": tf_gene, } data = self._make_request("interactions/", params) if not isinstance(data, list): return {"error": f"Unexpected response format from OmniPath: {type(data)}"} interactions = [] for item in data: dorothea_level = item.get("dorothea_level") if confidence_levels: levels = [level.strip() for level in confidence_levels.split(",")] if dorothea_level not in levels: continue interactions.append( { "tf_genesymbol": item.get("source_genesymbol"), "target_genesymbol": item.get("target_genesymbol"), "target_uniprot": item.get("target"), "mor": 1 if item.get("is_stimulation") else (-1 if item.get("is_inhibition") else 0), "is_stimulation": bool(item.get("is_stimulation", 0)), "is_inhibition": bool(item.get("is_inhibition", 0)), "dorothea_level": dorothea_level, "sources": item.get("sources", []), "curation_effort": item.get("curation_effort"), } ) interactions.sort(key=lambda x: x.get("curation_effort") or 0, reverse=True) by_level = {} for i in interactions: lvl = i.get("dorothea_level") or "unknown" by_level.setdefault(lvl, []).append(i) return { "data": interactions, "metadata": { "source": "OmniPath DoRothEA (omnipathdb.org)", "tf_gene": tf_gene, "total_targets": len(interactions), "by_confidence_level": {k: len(v) for k, v in by_level.items()}, "description": "DoRothEA regulon with mode of regulation (MoR): +1=activation, -1=repression", }, }