Source code for tooluniverse.omnipath_tool

# omnipath_tool.py
"""
OmniPath API tool for ToolUniverse.

OmniPath is the largest integrated database of intra- and intercellular
signaling knowledge. It combines data from 100+ resources including
CellPhoneDB, CellChatDB, CellTalkDB, SIGNOR, KEGG, Reactome, and more.

Provides access to:
- Ligand-receptor interactions (cell-cell communication)
- Intercellular role annotations (ligand/receptor classification)
- Signaling pathway interactions (directed, signed PPI)
- Protein complex compositions
- Cell communication annotations (from CellPhoneDB/CellChatDB/etc.)
- Enzyme-substrate (PTM) relationships

API: https://omnipathdb.org/
No authentication required. JSON format supported.
"""

import requests
from typing import Dict, Any
from .base_tool import BaseTool
from .tool_registry import register_tool

OMNIPATH_BASE_URL = "https://omnipathdb.org"



[docs]
@register_tool("OmniPathTool")
class OmniPathTool(BaseTool):
    """
    Tool for querying OmniPath intercellular and intracellular signaling data.

    OmniPath integrates 100+ databases covering:
    - Ligand-receptor interactions (14,000+ pairs)
    - Intercellular communication roles
    - Signaling pathway interactions
    - Protein complexes (22,000+)
    - Cell communication annotations
    - Enzyme-substrate relationships

    No authentication required.
    """


[docs]
    def __init__(self, tool_config: Dict[str, Any]):
        super().__init__(tool_config)
        self.timeout = tool_config.get("timeout", 30)
        fields = tool_config.get("fields", {})
        self.endpoint = fields.get("endpoint", "ligand_receptor")



[docs]
    def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
        """Execute the OmniPath API call."""
        try:
            return self._query(arguments)
        except requests.exceptions.Timeout:
            return {
                "status": "error",
                "error": f"OmniPath API timed out after {self.timeout}s",
            }
        except requests.exceptions.ConnectionError:
            return {
                "status": "error",
                "error": "Failed to connect to OmniPath API at omnipathdb.org",
            }
        except requests.exceptions.HTTPError as e:
            return {
                "status": "error",
                "error": f"OmniPath API HTTP error: {e.response.status_code}",
            }
        except Exception as e:
            return {
                "status": "error",
                "error": f"Unexpected error querying OmniPath: {str(e)}",
            }



[docs]
    def _query(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
        """Route to appropriate OmniPath endpoint."""
        if self.endpoint == "ligand_receptor":
            return self._get_ligand_receptor(arguments)
        elif self.endpoint == "intercell":
            return self._get_intercell(arguments)
        elif self.endpoint == "signaling":
            return self._get_signaling(arguments)
        elif self.endpoint == "complexes":
            return self._get_complexes(arguments)
        elif self.endpoint == "annotations":
            return self._get_annotations(arguments)
        elif self.endpoint == "annotation_resource_geneset":
            return self._get_annotation_resource_geneset(arguments)
        elif self.endpoint == "enz_sub":
            return self._get_enzyme_substrate(arguments)
        elif self.endpoint == "tf_target":
            return self._get_tf_target_interactions(arguments)
        elif self.endpoint == "dorothea":
            return self._get_dorothea_regulon(arguments)
        else:
            return {"status": "error", "error": f"Unknown endpoint: {self.endpoint}"}



[docs]
    def _make_request(self, path: str, params: Dict[str, Any]) -> Any:
        """Make an HTTP request to OmniPath API."""
        url = f"{OMNIPATH_BASE_URL}/{path}"
        # Always request JSON format
        params["format"] = "json"
        response = requests.get(url, params=params, timeout=self.timeout)
        # Check for OmniPath text error responses
        if response.headers.get("content-type", "").startswith("text/plain"):
            text = response.text.strip()
            if (
                "not entirely good" in text.lower()
                or "unknown argument" in text.lower()
            ):
                raise ValueError(f"OmniPath API error: {text[:200]}")
        response.raise_for_status()
        return response.json()



[docs]
    def _get_ligand_receptor(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
        """Get ligand-receptor interactions from the ligrecextra dataset."""
        params = {
            "genesymbols": "yes",
            "fields": "sources,references,curation_effort,type",
            "datasets": "ligrecextra",
        }

        # Add protein filters
        if arguments.get("partners"):
            params["partners"] = arguments["partners"]
        if arguments.get("sources"):
            params["sources"] = arguments["sources"]
        if arguments.get("targets"):
            params["targets"] = arguments["targets"]
        if arguments.get("databases"):
            params["databases"] = arguments["databases"]
        if arguments.get("organisms"):
            params["organisms"] = str(arguments["organisms"])
        if arguments.get("limit"):
            params["limit"] = str(arguments["limit"])

        # Must have at least one protein filter
        if not any(arguments.get(k) for k in ["partners", "sources", "targets"]):
            return {
                "status": "error",
                "error": "At least one of 'partners', 'sources', or 'targets' is required to query ligand-receptor interactions.",
            }

        data = self._make_request("interactions/", params)

        if not isinstance(data, list):
            return {
                "status": "error",
                "error": f"Unexpected response format from OmniPath: {type(data)}",
            }

        interactions = []
        for item in data:
            interactions.append(
                {
                    "source_uniprot": item.get("source"),
                    "target_uniprot": item.get("target"),
                    "source_genesymbol": item.get("source_genesymbol"),
                    "target_genesymbol": item.get("target_genesymbol"),
                    "is_directed": bool(item.get("is_directed", 0)),
                    "is_stimulation": bool(item.get("is_stimulation", 0)),
                    "is_inhibition": bool(item.get("is_inhibition", 0)),
                    "sources": item.get("sources", []),
                    "curation_effort": item.get("curation_effort"),
                    "type": item.get("type"),
                }
            )

        # Sort by curation effort (most curated first)
        interactions.sort(key=lambda x: x.get("curation_effort") or 0, reverse=True)

        return {
            "status": "success",
            "data": interactions,
            "metadata": {
                "source": "OmniPath (omnipathdb.org)",
                "dataset": "ligrecextra",
                "total_interactions": len(interactions),
                "description": "Ligand-receptor interactions from CellPhoneDB, CellChatDB, CellTalkDB, and 20+ databases",
            },
        }



[docs]
    def _get_intercell(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
        """Get intercellular communication role annotations."""
        params = {}

        if arguments.get("proteins"):
            params["proteins"] = arguments["proteins"]
        if arguments.get("categories"):
            params["categories"] = arguments["categories"]
        if arguments.get("scope"):
            params["scope"] = arguments["scope"]
        if arguments.get("transmitter") is not None:
            params["transmitter"] = "yes" if arguments["transmitter"] else "no"
        if arguments.get("receiver") is not None:
            params["receiver"] = "yes" if arguments["receiver"] else "no"
        if arguments.get("secreted") is not None:
            params["secreted"] = "yes" if arguments["secreted"] else "no"
        if arguments.get("limit"):
            params["limit"] = str(arguments["limit"])

        data = self._make_request("intercell/", params)

        if not isinstance(data, list):
            return {
                "status": "error",
                "error": f"Unexpected response format from OmniPath: {type(data)}",
            }

        results = []
        for item in data:
            results.append(
                {
                    "uniprot": item.get("uniprot"),
                    "genesymbol": item.get("genesymbol"),
                    "category": item.get("category"),
                    "parent": item.get("parent"),
                    "database": item.get("database"),
                    "scope": item.get("scope"),
                    "aspect": item.get("aspect"),
                    "consensus_score": item.get("consensus_score"),
                    "transmitter": item.get("transmitter", False),
                    "receiver": item.get("receiver", False),
                    "secreted": item.get("secreted", False),
                    "plasma_membrane_transmembrane": item.get(
                        "plasma_membrane_transmembrane", False
                    ),
                }
            )

        return {
            "status": "success",
            "data": results,
            "metadata": {
                "source": "OmniPath Intercell (omnipathdb.org)",
                "total_annotations": len(results),
                "description": "Intercellular communication role annotations from 40+ databases",
            },
        }



[docs]
    def _get_signaling(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
        """Get intracellular signaling pathway interactions."""
        datasets = arguments.get("datasets") or "omnipath"
        params = {
            "genesymbols": "yes",
            "fields": "sources,references,curation_effort,type",
            "datasets": datasets,
        }

        if arguments.get("partners"):
            params["partners"] = arguments["partners"]
        if arguments.get("sources"):
            params["sources"] = arguments["sources"]
        if arguments.get("targets"):
            params["targets"] = arguments["targets"]
        if arguments.get("directed") is not None:
            params["directed"] = "yes" if arguments["directed"] else "no"
        if arguments.get("signed") is not None:
            params["signed"] = "yes" if arguments["signed"] else "no"
        if arguments.get("organisms"):
            params["organisms"] = str(arguments["organisms"])
        if arguments.get("limit"):
            params["limit"] = str(arguments["limit"])

        if not any(arguments.get(k) for k in ["partners", "sources", "targets"]):
            return {
                "status": "error",
                "error": "At least one of 'partners', 'sources', or 'targets' is required to query signaling interactions.",
            }

        data = self._make_request("interactions/", params)

        if not isinstance(data, list):
            return {
                "status": "error",
                "error": f"Unexpected response format from OmniPath: {type(data)}",
            }

        interactions = []
        for item in data:
            interactions.append(
                {
                    "source_uniprot": item.get("source"),
                    "target_uniprot": item.get("target"),
                    "source_genesymbol": item.get("source_genesymbol"),
                    "target_genesymbol": item.get("target_genesymbol"),
                    "is_directed": bool(item.get("is_directed", 0)),
                    "is_stimulation": bool(item.get("is_stimulation", 0)),
                    "is_inhibition": bool(item.get("is_inhibition", 0)),
                    "consensus_direction": bool(item.get("consensus_direction", 0)),
                    "consensus_stimulation": bool(item.get("consensus_stimulation", 0)),
                    "consensus_inhibition": bool(item.get("consensus_inhibition", 0)),
                    "sources": item.get("sources", []),
                    "curation_effort": item.get("curation_effort"),
                    "type": item.get("type"),
                }
            )

        interactions.sort(key=lambda x: x.get("curation_effort") or 0, reverse=True)

        return {
            "status": "success",
            "data": interactions,
            "metadata": {
                "source": "OmniPath (omnipathdb.org)",
                "datasets": datasets,
                "total_interactions": len(interactions),
            },
        }



[docs]
    def _get_complexes(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
        """Get protein complex compositions."""
        proteins = arguments.get("proteins", "")
        if not proteins:
            return {
                "status": "error",
                "error": "proteins parameter is required (UniProt accession(s), e.g., P01137)",
            }

        params = {"proteins": proteins}
        if arguments.get("databases"):
            params["databases"] = arguments["databases"]

        data = self._make_request("complexes/", params)

        if not isinstance(data, list):
            return {
                "status": "error",
                "error": f"Unexpected response format from OmniPath: {type(data)}",
            }

        complexes = []
        for item in data:
            complexes.append(
                {
                    "name": item.get("name"),
                    "components": item.get("components"),
                    "components_genesymbols": item.get("components_genesymbols"),
                    "stoichiometry": item.get("stoichiometry"),
                    "sources": item.get("sources", []),
                    "references": item.get("references"),
                    "identifiers": item.get("identifiers"),
                }
            )

        return {
            "status": "success",
            "data": complexes,
            "metadata": {
                "source": "OmniPath Complexes (omnipathdb.org)",
                "query_proteins": proteins,
                "total_complexes": len(complexes),
                "description": "Protein complexes from CORUM, CellPhoneDB, ComplexPortal, and other databases",
            },
        }



[docs]
    def _get_annotations(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
        """Get cell communication annotations from CellPhoneDB, CellChatDB, etc."""
        proteins = arguments.get("proteins", "")
        if not proteins:
            return {
                "status": "error",
                "error": "proteins parameter is required (UniProt accession(s) or gene symbol(s))",
            }

        databases = arguments.get("databases") or "CellPhoneDB,CellChatDB"

        params = {
            "proteins": proteins,
            "databases": databases,
        }

        data = self._make_request("annotations/", params)

        if not isinstance(data, list):
            return {
                "status": "error",
                "error": f"Unexpected response format from OmniPath: {type(data)}",
            }

        annotations = []
        for item in data:
            annotations.append(
                {
                    "uniprot": item.get("uniprot"),
                    "genesymbol": item.get("genesymbol"),
                    "entity_type": item.get("entity_type"),
                    "source": item.get("source"),
                    "label": item.get("label"),
                    "value": item.get("value"),
                    "record_id": item.get("record_id"),
                }
            )

        return {
            "status": "success",
            "data": annotations,
            "metadata": {
                "source": "OmniPath Annotations (omnipathdb.org)",
                "databases_queried": databases,
                "total_annotations": len(annotations),
            },
        }



[docs]
    def _get_annotation_resource_geneset(
        self, arguments: Dict[str, Any]
    ) -> Dict[str, Any]:
        """Retrieve the full annotated gene/protein SET for an OmniPath annotation
        resource, with NO protein filter (resource-wide reverse lookup).

        Calls /annotations?resources=<X> with no 'proteins' argument, returning the
        entire membership of any OmniPath annotation resource (80+ available), e.g.
        all cell-surface proteins (Surfaceome), all kinases (kinase.com), all
        transcription factors (TFcensus), all phosphatases (Phosphatome), or all
        CancerGeneCensus driver genes. Records are returned in long format (one
        label/value pair per row); this method also groups them per protein.
        """
        resource = arguments.get("resource") or arguments.get("resources")
        if not resource:
            return {
                "status": "error",
                "error": (
                    "'resource' is required: the OmniPath annotation resource whose "
                    "full gene/protein set to retrieve (e.g., 'CancerGeneCensus', "
                    "'Surfaceome', 'TFcensus', 'kinase.com', 'Phosphatome')."
                ),
            }

        params = {"resources": resource}
        if arguments.get("entity_types"):
            params["entity_types"] = arguments["entity_types"]

        data = self._make_request("annotations/", params)

        if not isinstance(data, list):
            return {
                "status": "error",
                "error": f"Unexpected response format from OmniPath: {type(data)}",
            }

        # Long-format rows -> group label/value pairs per (uniprot, genesymbol)
        grouped: Dict[Any, Dict[str, Any]] = {}
        order = []
        for item in data:
            uniprot = item.get("uniprot")
            genesymbol = item.get("genesymbol")
            key = (uniprot, genesymbol)
            if key not in grouped:
                grouped[key] = {
                    "uniprot": uniprot,
                    "genesymbol": genesymbol,
                    "entity_type": item.get("entity_type"),
                    "annotations": {},
                }
                order.append(key)
            label = item.get("label")
            value = item.get("value")
            if label is not None:
                grouped[key]["annotations"][label] = value

        members = [grouped[k] for k in order]

        if not members:
            return {
                "status": "success",
                "data": [],
                "metadata": {
                    "source": "OmniPath Annotations (omnipathdb.org)",
                    "resource": resource,
                    "total_members": 0,
                    "total_records": 0,
                    "note": (
                        f"No annotations returned for resource '{resource}'. The "
                        "resource name may be misspelled or unavailable. Verify the "
                        "exact name via https://omnipathdb.org/queries/annotations "
                        "(examples: CancerGeneCensus, Surfaceome, TFcensus, "
                        "kinase.com, Phosphatome, TFcensus)."
                    ),
                },
            }

        return {
            "status": "success",
            "data": members,
            "metadata": {
                "source": "OmniPath Annotations (omnipathdb.org)",
                "resource": resource,
                "total_members": len(members),
                "total_records": len(data),
                "description": (
                    "Full annotated gene/protein set (resource-wide membership) for "
                    f"the OmniPath annotation resource '{resource}'. Each member lists "
                    "its label/value annotations from that resource."
                ),
            },
        }



[docs]
    def _get_enzyme_substrate(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
        """Get enzyme-substrate (PTM) interactions."""
        params = {
            "genesymbols": "yes",
            "fields": "sources,references",
        }

        if arguments.get("enzymes"):
            params["enzymes"] = arguments["enzymes"]
        if arguments.get("substrates"):
            params["substrates"] = arguments["substrates"]
        if arguments.get("types"):
            params["types"] = arguments["types"]
        if arguments.get("organisms"):
            params["organisms"] = str(arguments["organisms"])
        if arguments.get("limit"):
            params["limit"] = str(arguments["limit"])

        if not any(arguments.get(k) for k in ["enzymes", "substrates"]):
            return {
                "status": "error",
                "error": "At least one of 'enzymes' or 'substrates' is required.",
            }

        data = self._make_request("enz_sub/", params)

        if not isinstance(data, list):
            return {
                "status": "error",
                "error": f"Unexpected response format from OmniPath: {type(data)}",
            }

        results = []
        for item in data:
            results.append(
                {
                    "enzyme_uniprot": item.get("enzyme"),
                    "substrate_uniprot": item.get("substrate"),
                    "enzyme_genesymbol": item.get("enzyme_genesymbol"),
                    "substrate_genesymbol": item.get("substrate_genesymbol"),
                    "residue_type": item.get("residue_type"),
                    "residue_offset": item.get("residue_offset"),
                    "modification": item.get("modification"),
                    "sources": item.get("sources", []),
                }
            )

        return {
            "status": "success",
            "data": results,
            "metadata": {
                "source": "OmniPath Enzyme-Substrate (omnipathdb.org)",
                "total_ptms": len(results),
                "description": "PTM data from PhosphoSite, phosphoELM, dbPTM, SIGNOR, and other databases",
            },
        }



[docs]
    def _get_tf_target_interactions(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
        tf_gene = arguments.get("tf_gene")
        target_gene = arguments.get("target_gene")
        confidence_level = arguments.get("confidence_level")

        if not tf_gene and not target_gene:
            return {
                "status": "error",
                "error": "At least one of 'tf_gene' or 'target_gene' is required",
            }

        params = {
            "genesymbols": "yes",
            "fields": "sources,references,curation_effort,type",
            "datasets": "dorothea,collectri",
        }

        if tf_gene:
            params["sources"] = tf_gene
        if target_gene:
            params["targets"] = target_gene

        data = self._make_request("interactions/", params)

        if not isinstance(data, list):
            return {
                "status": "error",
                "error": f"Unexpected response format from OmniPath: {type(data)}",
            }

        interactions = []
        for item in data:
            dorothea_level = item.get("dorothea_level")
            if (
                confidence_level
                and dorothea_level
                and dorothea_level != confidence_level
            ):
                continue

            interactions.append(
                {
                    "tf_uniprot": item.get("source"),
                    "target_uniprot": item.get("target"),
                    "tf_genesymbol": item.get("source_genesymbol"),
                    "target_genesymbol": item.get("target_genesymbol"),
                    "is_stimulation": bool(item.get("is_stimulation", 0)),
                    "is_inhibition": bool(item.get("is_inhibition", 0)),
                    "dorothea_level": dorothea_level,
                    "sources": item.get("sources", []),
                    "curation_effort": item.get("curation_effort"),
                }
            )

        interactions.sort(key=lambda x: x.get("curation_effort") or 0, reverse=True)

        return {
            "status": "success",
            "data": interactions,
            "metadata": {
                "source": "OmniPath DoRothEA + CollecTRI (omnipathdb.org)",
                "total_interactions": len(interactions),
                "description": "TF-target interactions from DoRothEA and CollecTRI regulons",
            },
        }



[docs]
    def _get_dorothea_regulon(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
        tf_gene = arguments.get("tf_gene")
        if not tf_gene:
            return {
                "status": "error",
                "error": "'tf_gene' is required for DoRothEA regulon query",
            }

        confidence_levels = arguments.get("confidence_levels")

        params = {
            "genesymbols": "yes",
            "fields": "sources,references,curation_effort,type",
            "datasets": "dorothea",
            "sources": tf_gene,
        }

        data = self._make_request("interactions/", params)

        if not isinstance(data, list):
            return {
                "status": "error",
                "error": f"Unexpected response format from OmniPath: {type(data)}",
            }

        interactions = []
        for item in data:
            dorothea_level = item.get("dorothea_level")

            if confidence_levels:
                levels = [level.strip() for level in confidence_levels.split(",")]
                if dorothea_level not in levels:
                    continue

            interactions.append(
                {
                    "tf_genesymbol": item.get("source_genesymbol"),
                    "target_genesymbol": item.get("target_genesymbol"),
                    "target_uniprot": item.get("target"),
                    "mor": 1
                    if item.get("is_stimulation")
                    else (-1 if item.get("is_inhibition") else 0),
                    "is_stimulation": bool(item.get("is_stimulation", 0)),
                    "is_inhibition": bool(item.get("is_inhibition", 0)),
                    "dorothea_level": dorothea_level,
                    "sources": item.get("sources", []),
                    "curation_effort": item.get("curation_effort"),
                }
            )

        interactions.sort(key=lambda x: x.get("curation_effort") or 0, reverse=True)

        by_level = {}
        for i in interactions:
            lvl = i.get("dorothea_level") or "unknown"
            by_level.setdefault(lvl, []).append(i)

        return {
            "status": "success",
            "data": interactions,
            "metadata": {
                "source": "OmniPath DoRothEA (omnipathdb.org)",
                "tf_gene": tf_gene,
                "total_targets": len(interactions),
                "by_confidence_level": {k: len(v) for k, v in by_level.items()},
                "description": "DoRothEA regulon with mode of regulation (MoR): +1=activation, -1=repression",
            },
        }