tooluniverse.signor_tool 源代码
"""
SIGNOR Tool - SIGnaling Network Open Resource
SIGNOR is a database of causal relationships between biological entities
(proteins, chemicals, complexes, phenotypes) in cell signaling. Each
relationship describes how entity A affects entity B through a specific
mechanism (phosphorylation, ubiquitination, transcriptional regulation, etc.).
API: https://signor.uniroma2.it/getData.php (TSV format)
Pathways: https://signor.uniroma2.it/getPathwayData.php
Reference: Licata et al. (2020) Nucleic Acids Research
"""
import requests
from functools import lru_cache
from typing import Dict, Any, List
from .base_tool import BaseTool
from .tool_registry import register_tool
SIGNOR_DATA_URL = "https://signor.uniroma2.it/getData.php"
SIGNOR_PATHWAY_URL = "https://signor.uniroma2.it/getPathwayData.php"
UNIPROT_SEARCH_URL = "https://rest.uniprot.org/uniprotkb/search"
@lru_cache(maxsize=256)
def _resolve_gene_to_uniprot(gene_symbol: str, taxon_id: int = 9606) -> str:
"""Resolve a gene symbol to a reviewed UniProt accession (cached per process)."""
try:
resp = requests.get(
UNIPROT_SEARCH_URL,
params={
"query": f"gene_exact:{gene_symbol} AND organism_id:{taxon_id} AND reviewed:true",
"fields": "accession",
"format": "json",
"size": 1,
},
timeout=10,
)
if resp.status_code == 200:
hits = resp.json().get("results", [])
if hits:
return hits[0].get("primaryAccession", "")
except Exception:
pass
return ""
# Column names for getData.php TSV response (no header row)
DATA_COLUMNS = [
"entitya",
"typea",
"ida",
"databasea",
"entityb",
"typeb",
"idb",
"databaseb",
"effect",
"mechanism",
"residue",
"sequence",
"tax_id",
"cell_data",
"tissue_data",
"modulator_complex",
"target_complex",
"modificationa",
"modaseq",
"modificationb",
"modbseq",
"pmid",
"direct",
"notes",
"annotator",
"sentence",
"signor_id",
"score",
]
def _parse_tsv(
text: str, columns: List[str], has_header: bool = False
) -> List[Dict[str, str]]:
"""Parse TSV text into list of dicts."""
lines = text.strip().split("\n")
if not lines:
return []
start = 1 if has_header else 0
results = []
for line in lines[start:]:
if not line.strip():
continue
fields = line.split("\t")
row = {}
for i, col in enumerate(columns):
row[col] = fields[i].strip() if i < len(fields) else ""
results.append(row)
return results
def _format_interaction(row: Dict[str, str]) -> Dict[str, Any]:
"""Convert a parsed TSV row into a structured interaction dict."""
return {
"source_entity": row.get("entitya", ""),
"source_type": row.get("typea", ""),
"source_id": row.get("ida", ""),
"target_entity": row.get("entityb", ""),
"target_type": row.get("typeb", ""),
"target_id": row.get("idb", ""),
"effect": row.get("effect", ""),
"mechanism": row.get("mechanism", ""),
"residue": row.get("residue", "") or None,
"pmid": row.get("pmid", "") or None,
"direct": row.get("direct", "") == "t",
"score": float(row["score"]) if row.get("score") else None,
"signor_id": row.get("signor_id", ""),
}
[文档]
@register_tool("SIGNORTool")
class SIGNORTool(BaseTool):
"""Query SIGNOR for causal signaling relationships and pathways."""
[文档]
def __init__(self, tool_config):
super().__init__(tool_config)
self.parameter = tool_config.get("parameter", {})
self.required = self.parameter.get("required", [])
self.session = requests.Session()
[文档]
def _get_interactions(self, params: Dict[str, Any]) -> Dict[str, Any]:
"""Get signaling interactions for a protein/entity."""
entity_id = (
params.get("entity_id")
or params.get("protein")
or params.get("uniprot_id")
or ""
)
organism = params.get("organism", 9606)
limit = params.get("limit", 50)
if not entity_id:
return {
"status": "error",
"error": "entity_id parameter is required (e.g., UniProt ID like P04637)",
}
resp = self.session.get(
SIGNOR_DATA_URL,
params={"organism": organism, "id": entity_id},
timeout=30,
)
if resp.status_code != 200:
return {
"status": "error",
"error": f"SIGNOR request failed: HTTP {resp.status_code}",
}
# If no results and input looks like a gene symbol, try resolving to UniProt ID
if resp.text.strip() in (
"",
"No result found.",
) or resp.text.strip().startswith("<!"):
resolved = _resolve_gene_to_uniprot(entity_id, organism)
if resolved and resolved != entity_id:
resp2 = self.session.get(
SIGNOR_DATA_URL,
params={"organism": organism, "id": resolved},
timeout=30,
)
if (
resp2.status_code == 200
and resp2.text.strip()
and not resp2.text.strip().startswith("<!")
and resp2.text.strip() != "No result found."
):
resp = resp2
entity_id = resolved
else:
return {
"status": "error",
"error": f"No interactions found for '{params.get('entity_id') or params.get('protein')}' (resolved to UniProt {resolved}) in organism {organism}. SIGNOR requires UniProt accessions (e.g., P04637 for TP53).",
}
else:
return {
"status": "error",
"error": f"No interactions found for '{entity_id}' in organism {organism}. SIGNOR requires UniProt accessions (e.g., P04637 for TP53).",
}
rows = _parse_tsv(resp.text, DATA_COLUMNS, has_header=False)
interactions = [_format_interaction(row) for row in rows[:limit]]
return {
"status": "success",
"data": interactions,
"metadata": {
"entity_id": entity_id,
"organism": organism,
"total_interactions": len(rows),
"returned": len(interactions),
},
}
[文档]
def _list_pathways(self, params: Dict[str, Any]) -> Dict[str, Any]:
"""List all SIGNOR curated signaling pathways."""
resp = self.session.get(
SIGNOR_PATHWAY_URL,
params={"description": ""},
timeout=30,
)
if resp.status_code != 200:
return {
"status": "error",
"error": f"SIGNOR pathway list failed: HTTP {resp.status_code}",
}
cols = ["sig_id", "path_name", "path_description", "path_curator"]
rows = _parse_tsv(resp.text, cols, has_header=True)
query = params.get("query", "").lower()
if query:
rows = [
r
for r in rows
if query in r.get("path_name", "").lower()
or query in r.get("path_description", "").lower()
]
pathways = [
{
"pathway_id": r.get("sig_id", ""),
"name": r.get("path_name", ""),
"description": r.get("path_description", "")[:300] or None,
"curator": r.get("path_curator", "") or None,
}
for r in rows
]
return {
"status": "success",
"data": pathways,
"metadata": {"total_pathways": len(pathways)},
}
[文档]
def _get_pathway(self, params: Dict[str, Any]) -> Dict[str, Any]:
"""Get interactions belonging to a specific SIGNOR pathway."""
pathway_id = params.get("pathway_id", "")
limit = params.get("limit", 100)
if not pathway_id:
return {
"status": "error",
"error": "pathway_id is required (e.g., SIGNOR-AD, SIGNOR-C)",
}
resp = self.session.get(
SIGNOR_PATHWAY_URL,
params={"pathway": pathway_id, "relations": "only"},
timeout=30,
)
if resp.status_code != 200:
return {
"status": "error",
"error": f"SIGNOR pathway request failed: HTTP {resp.status_code}",
}
if not resp.text.strip():
return {
"status": "error",
"error": f"Pathway '{pathway_id}' not found",
}
# Pathway relations have a header row with columns
path_cols = [
"pathway_id",
"pathway_name",
"entitya",
"regulator_location",
"typea",
"ida",
"databasea",
"entityb",
"target_location",
"typeb",
"idb",
"databaseb",
"effect",
"mechanism",
"residue",
"sequence",
"tax_id",
"cell_data",
"tissue_data",
"modulator_complex",
"target_complex",
"modificationa",
"modaseq",
"modificationb",
"modbseq",
"pmid",
"direct",
"annotator",
"sentence",
"notes",
"signor_id",
"score",
]
rows = _parse_tsv(resp.text, path_cols, has_header=True)
interactions = [_format_interaction(row) for row in rows[:limit]]
return {
"status": "success",
"data": interactions,
"metadata": {
"pathway_id": pathway_id,
"total_interactions": len(rows),
"returned": len(interactions),
},
}
[文档]
def run(self, params: Dict[str, Any]) -> Dict[str, Any]:
operation = self.tool_config.get("fields", {}).get("operation", "")
if operation == "get_interactions":
return self._get_interactions(params)
if operation == "list_pathways":
return self._list_pathways(params)
if operation == "get_pathway":
return self._get_pathway(params)
return {"status": "error", "error": f"Unknown operation: {operation}"}