tooluniverse.signor_tool 源代码

"""
SIGNOR Tool - SIGnaling Network Open Resource

SIGNOR is a database of causal relationships between biological entities
(proteins, chemicals, complexes, phenotypes) in cell signaling. Each
relationship describes how entity A affects entity B through a specific
mechanism (phosphorylation, ubiquitination, transcriptional regulation, etc.).

API: https://signor.uniroma2.it/getData.php (TSV format)
Pathways: https://signor.uniroma2.it/getPathwayData.php
Reference: Licata et al. (2020) Nucleic Acids Research
"""

import requests
from functools import lru_cache
from typing import Dict, Any, List
from .base_tool import BaseTool
from .tool_registry import register_tool

SIGNOR_DATA_URL = "https://signor.uniroma2.it/getData.php"
SIGNOR_PATHWAY_URL = "https://signor.uniroma2.it/getPathwayData.php"
UNIPROT_SEARCH_URL = "https://rest.uniprot.org/uniprotkb/search"


@lru_cache(maxsize=256)
def _resolve_gene_to_uniprot(gene_symbol: str, taxon_id: int = 9606) -> str:
    """Resolve a gene symbol to a reviewed UniProt accession (cached per process)."""
    try:
        resp = requests.get(
            UNIPROT_SEARCH_URL,
            params={
                "query": f"gene_exact:{gene_symbol} AND organism_id:{taxon_id} AND reviewed:true",
                "fields": "accession",
                "format": "json",
                "size": 1,
            },
            timeout=10,
        )
        if resp.status_code == 200:
            hits = resp.json().get("results", [])
            if hits:
                return hits[0].get("primaryAccession", "")
    except Exception:
        pass
    return ""


# Column names for getData.php TSV response (no header row)
DATA_COLUMNS = [
    "entitya",
    "typea",
    "ida",
    "databasea",
    "entityb",
    "typeb",
    "idb",
    "databaseb",
    "effect",
    "mechanism",
    "residue",
    "sequence",
    "tax_id",
    "cell_data",
    "tissue_data",
    "modulator_complex",
    "target_complex",
    "modificationa",
    "modaseq",
    "modificationb",
    "modbseq",
    "pmid",
    "direct",
    "notes",
    "annotator",
    "sentence",
    "signor_id",
    "score",
]


def _parse_tsv(
    text: str, columns: List[str], has_header: bool = False
) -> List[Dict[str, str]]:
    """Parse TSV text into list of dicts."""
    lines = text.strip().split("\n")
    if not lines:
        return []
    start = 1 if has_header else 0
    results = []
    for line in lines[start:]:
        if not line.strip():
            continue
        fields = line.split("\t")
        row = {}
        for i, col in enumerate(columns):
            row[col] = fields[i].strip() if i < len(fields) else ""
        results.append(row)
    return results


def _format_interaction(row: Dict[str, str]) -> Dict[str, Any]:
    """Convert a parsed TSV row into a structured interaction dict."""
    return {
        "source_entity": row.get("entitya", ""),
        "source_type": row.get("typea", ""),
        "source_id": row.get("ida", ""),
        "target_entity": row.get("entityb", ""),
        "target_type": row.get("typeb", ""),
        "target_id": row.get("idb", ""),
        "effect": row.get("effect", ""),
        "mechanism": row.get("mechanism", ""),
        "residue": row.get("residue", "") or None,
        "pmid": row.get("pmid", "") or None,
        "direct": row.get("direct", "") == "t",
        "score": float(row["score"]) if row.get("score") else None,
        "signor_id": row.get("signor_id", ""),
    }


[文档] @register_tool("SIGNORTool") class SIGNORTool(BaseTool): """Query SIGNOR for causal signaling relationships and pathways."""
[文档] def __init__(self, tool_config): super().__init__(tool_config) self.parameter = tool_config.get("parameter", {}) self.required = self.parameter.get("required", []) self.session = requests.Session()
[文档] def _get_interactions(self, params: Dict[str, Any]) -> Dict[str, Any]: """Get signaling interactions for a protein/entity.""" entity_id = ( params.get("entity_id") or params.get("protein") or params.get("uniprot_id") or "" ) organism = params.get("organism", 9606) limit = params.get("limit", 50) if not entity_id: return { "status": "error", "error": "entity_id parameter is required (e.g., UniProt ID like P04637)", } resp = self.session.get( SIGNOR_DATA_URL, params={"organism": organism, "id": entity_id}, timeout=30, ) if resp.status_code != 200: return { "status": "error", "error": f"SIGNOR request failed: HTTP {resp.status_code}", } # If no results and input looks like a gene symbol, try resolving to UniProt ID if resp.text.strip() in ( "", "No result found.", ) or resp.text.strip().startswith("<!"): resolved = _resolve_gene_to_uniprot(entity_id, organism) if resolved and resolved != entity_id: resp2 = self.session.get( SIGNOR_DATA_URL, params={"organism": organism, "id": resolved}, timeout=30, ) if ( resp2.status_code == 200 and resp2.text.strip() and not resp2.text.strip().startswith("<!") and resp2.text.strip() != "No result found." ): resp = resp2 entity_id = resolved else: return { "status": "error", "error": f"No interactions found for '{params.get('entity_id') or params.get('protein')}' (resolved to UniProt {resolved}) in organism {organism}. SIGNOR requires UniProt accessions (e.g., P04637 for TP53).", } else: return { "status": "error", "error": f"No interactions found for '{entity_id}' in organism {organism}. SIGNOR requires UniProt accessions (e.g., P04637 for TP53).", } rows = _parse_tsv(resp.text, DATA_COLUMNS, has_header=False) interactions = [_format_interaction(row) for row in rows[:limit]] return { "status": "success", "data": interactions, "metadata": { "entity_id": entity_id, "organism": organism, "total_interactions": len(rows), "returned": len(interactions), }, }
[文档] def _list_pathways(self, params: Dict[str, Any]) -> Dict[str, Any]: """List all SIGNOR curated signaling pathways.""" resp = self.session.get( SIGNOR_PATHWAY_URL, params={"description": ""}, timeout=30, ) if resp.status_code != 200: return { "status": "error", "error": f"SIGNOR pathway list failed: HTTP {resp.status_code}", } cols = ["sig_id", "path_name", "path_description", "path_curator"] rows = _parse_tsv(resp.text, cols, has_header=True) query = params.get("query", "").lower() if query: rows = [ r for r in rows if query in r.get("path_name", "").lower() or query in r.get("path_description", "").lower() ] pathways = [ { "pathway_id": r.get("sig_id", ""), "name": r.get("path_name", ""), "description": r.get("path_description", "")[:300] or None, "curator": r.get("path_curator", "") or None, } for r in rows ] return { "status": "success", "data": pathways, "metadata": {"total_pathways": len(pathways)}, }
[文档] def _get_pathway(self, params: Dict[str, Any]) -> Dict[str, Any]: """Get interactions belonging to a specific SIGNOR pathway.""" pathway_id = params.get("pathway_id", "") limit = params.get("limit", 100) if not pathway_id: return { "status": "error", "error": "pathway_id is required (e.g., SIGNOR-AD, SIGNOR-C)", } resp = self.session.get( SIGNOR_PATHWAY_URL, params={"pathway": pathway_id, "relations": "only"}, timeout=30, ) if resp.status_code != 200: return { "status": "error", "error": f"SIGNOR pathway request failed: HTTP {resp.status_code}", } if not resp.text.strip(): return { "status": "error", "error": f"Pathway '{pathway_id}' not found", } # Pathway relations have a header row with columns path_cols = [ "pathway_id", "pathway_name", "entitya", "regulator_location", "typea", "ida", "databasea", "entityb", "target_location", "typeb", "idb", "databaseb", "effect", "mechanism", "residue", "sequence", "tax_id", "cell_data", "tissue_data", "modulator_complex", "target_complex", "modificationa", "modaseq", "modificationb", "modbseq", "pmid", "direct", "annotator", "sentence", "notes", "signor_id", "score", ] rows = _parse_tsv(resp.text, path_cols, has_header=True) interactions = [_format_interaction(row) for row in rows[:limit]] return { "status": "success", "data": interactions, "metadata": { "pathway_id": pathway_id, "total_interactions": len(rows), "returned": len(interactions), }, }
[文档] def run(self, params: Dict[str, Any]) -> Dict[str, Any]: operation = self.tool_config.get("fields", {}).get("operation", "") if operation == "get_interactions": return self._get_interactions(params) if operation == "list_pathways": return self._list_pathways(params) if operation == "get_pathway": return self._get_pathway(params) return {"status": "error", "error": f"Unknown operation: {operation}"}