tooluniverse.pathwaycommons_tool 源代码

"""
PathwayCommons Tool - Unified Pathway and Interaction Database

Provides access to Pathway Commons REST API (PC2) for searching pathways,
retrieving pathway details, and querying gene interaction neighborhoods
across 22 integrated pathway/interaction databases including Reactome,
KEGG, WikiPathways, PID, BioGRID, IntAct, and more.

API base: https://www.pathwaycommons.org/pc2/
No authentication required.

Reference: Cerami et al., Nucleic Acids Res. 2011; Rodchenkov et al., Nucleic Acids Res. 2020
"""

import requests
from typing import Dict, Any, Optional

from .base_tool import BaseTool
from .tool_registry import register_tool


PC2_BASE_URL = "https://www.pathwaycommons.org/pc2"


[文档] @register_tool("PathwayCommonsTool") class PathwayCommonsTool(BaseTool): """ Tool for querying Pathway Commons (PC2) unified pathway/interaction database. Supported operations: - search: Search for pathways, interactions, or molecular entities by gene/keyword - get_pathway: Get pathway details by URI - get_neighborhood: Get interaction neighborhood for a gene (SIF format) """
[文档] def __init__(self, tool_config: Dict[str, Any]): super().__init__(tool_config) self.parameter = tool_config.get("parameter", {}) self.required = self.parameter.get("required", []) self.session = requests.Session() self.timeout = 120
[文档] def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]: operation = ( arguments.get("operation") or self.tool_config.get("fields", {}).get("operation") or self.get_schema_const_operation() ) if not operation: return {"status": "error", "error": "Missing required parameter: operation"} handlers = { "search": self._search, "get_pathway": self._get_pathway, "get_neighborhood": self._get_neighborhood, } handler = handlers.get(operation) if not handler: return { "status": "error", "error": "Unknown operation: {}. Available: {}".format( operation, list(handlers.keys()) ), } try: return handler(arguments) except requests.exceptions.Timeout: return {"status": "error", "error": "PathwayCommons API request timed out"} except requests.exceptions.ConnectionError: return { "status": "error", "error": "Failed to connect to PathwayCommons API", } except Exception as e: return { "status": "error", "error": "PathwayCommons error: {}".format(str(e)), }
[文档] def _traverse(self, uri: str, path: str) -> Optional[list]: """Helper: call the PC2 traverse endpoint.""" params = {"uri": uri, "path": path} resp = self.session.get( "{}/traverse".format(PC2_BASE_URL), params=params, timeout=self.timeout ) if resp.status_code != 200: return None data = resp.json() entries = data.get("traverseEntry", []) if entries: return entries[0].get("value", []) return []
[文档] def _get_pathway(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Get pathway details by URI from PathwayCommons using traverse API.""" uri = arguments.get("uri") if not uri: return {"status": "error", "error": "Missing required parameter: uri"} # Get pathway metadata via traverse endpoint (fast, reliable) name = self._traverse(uri, "Pathway/displayName") comment = self._traverse(uri, "Pathway/comment") organism = self._traverse(uri, "Pathway/organism/displayName") data_source = self._traverse(uri, "Pathway/dataSource/displayName") sub_pathways = self._traverse( uri, "Pathway/pathwayComponent:Pathway/displayName" ) participants = self._traverse( uri, "Pathway/pathwayComponent/participant/displayName" ) if name is None: return { "status": "error", "error": "Failed to retrieve pathway data for URI: {}".format(uri), } result = { "pathway": { "uri": uri, "name": name[0] if name else None, "description": comment[0] if comment else None, "organism": organism[0] if organism else None, "data_source": data_source[0] if data_source else None, }, "sub_pathways": sub_pathways or [], "participants": participants or [], } return { "status": "success", "data": result, "metadata": { "uri": uri, "sub_pathway_count": len(sub_pathways or []), "participant_count": len(participants or []), }, }
[文档] def _get_neighborhood(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Get interaction neighborhood for a gene from PathwayCommons (SIF format).""" gene = arguments.get("gene") or arguments.get("source") if not gene: return {"status": "error", "error": "Missing required parameter: gene"} limit = arguments.get("limit", 1) datasource = arguments.get("datasource") params = { "source": gene, "kind": "neighborhood", "format": "TXT", "limit": limit, } if datasource: params["datasource"] = datasource response = self.session.get( "{}/graph".format(PC2_BASE_URL), params=params, timeout=self.timeout ) if response.status_code != 200: return { "status": "error", "error": "PathwayCommons graph returned HTTP {}".format( response.status_code ), } text = response.text.strip() if not text: return { "status": "success", "data": {"interactions": [], "gene": gene}, "metadata": {"gene": gene, "interaction_count": 0}, } lines = text.split("\n") header = None interactions = [] for line in lines: fields = line.split("\t") if header is None: header = fields continue if len(fields) >= 3: interaction = { "participant_a": fields[0], "interaction_type": fields[1], "participant_b": fields[2], } if len(fields) > 3: interaction["data_source"] = fields[3] if fields[3] else None if len(fields) > 4: interaction["pubmed_ids"] = fields[4] if fields[4] else None if len(fields) > 5: interaction["pathway_names"] = fields[5] if fields[5] else None interactions.append(interaction) # Collect unique interaction types and partners interaction_types = {} partners = set() for ix in interactions: itype = ix["interaction_type"] interaction_types[itype] = interaction_types.get(itype, 0) + 1 if ix["participant_a"].upper() != gene.upper(): partners.add(ix["participant_a"]) if ix["participant_b"].upper() != gene.upper(): partners.add(ix["participant_b"]) max_return = arguments.get("max_results", 100) return { "status": "success", "data": { "gene": gene, "interactions": interactions[:max_return], "interaction_type_counts": interaction_types, "unique_partners": len(partners), }, "metadata": { "gene": gene, "total_interactions": len(interactions), "returned": min(len(interactions), max_return), "limit": limit, }, }