tooluniverse.appris_tool 源代码
"""APPRIS - Principal Isoform Database tool.
APPRIS annotates alternative splice isoforms and selects principal isoforms
for vertebrate genomes. It uses functional, structural, and conservation
information to label principal and alternative isoforms.
API docs: https://apprisws.bioinfo.cnio.es/
"""
from __future__ import annotations
from typing import Any, Dict
import requests
from .base_tool import BaseTool
from .http_utils import request_with_retry
from .tool_registry import register_tool
[文档]
@register_tool("APPRISTool")
class APPRISTool(BaseTool):
"""Tool for APPRIS principal isoform annotation queries."""
BASE_URL = "https://apprisws.bioinfo.cnio.es/rest"
# Available analysis methods in APPRIS
VALID_METHODS = {
"appris",
"firestar",
"matador3d",
"spade",
"corsair",
"thump",
"crash",
"proteo",
"tsl",
}
[文档]
def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
fields = self.tool_config.get("fields") or {}
operation = fields.get("operation", "get_isoforms")
if operation == "get_isoforms":
return self._get_isoforms(arguments)
if operation == "get_principal":
return self._get_principal(arguments)
if operation == "get_functional_annotations":
return self._get_functional_annotations(arguments)
return {"status": "error", "error": f"Unknown operation: {operation}"}
[文档]
def _build_url(self, gene_id: str, species: str = "homo_sapiens") -> str:
return f"{self.BASE_URL}/exporter/id/{species}/{gene_id}"
[文档]
def _fetch(
self,
gene_id: str,
species: str = "homo_sapiens",
methods: str | None = None,
) -> Dict[str, Any]:
url = self._build_url(gene_id, species)
params: Dict[str, str] = {"format": "json", "sc": "ensembl"}
if methods:
params["methods"] = methods
try:
resp = request_with_retry(requests, "GET", url, params=params, timeout=30)
except Exception as exc:
return {"status": "error", "error": f"Request failed: {exc}"}
if resp.status_code != 200:
return {
"status": "error",
"error": f"HTTP {resp.status_code}",
"detail": resp.text[:500],
}
try:
data = resp.json()
except Exception:
return {"status": "error", "error": "Failed to parse JSON response"}
if not data:
return {
"status": "error",
"error": (
f"No APPRIS data found for {gene_id}. "
"Ensure you use an Ensembl gene ID (ENSG...) for the correct species."
),
}
return {"status": "success", "data": data}
[文档]
@staticmethod
def _filter_principal_isoforms(records: list) -> list:
return [r for r in records if r.get("type") == "principal_isoform"]
[文档]
def _get_isoforms(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
gene_id = (arguments.get("gene_id") or "").strip()
if not gene_id:
return {"status": "error", "error": "gene_id is required"}
species = (arguments.get("species") or "homo_sapiens").strip()
result = self._fetch(gene_id, species, methods="appris")
if result.get("status") == "error":
return result
isoforms = self._filter_principal_isoforms(result["data"])
return {
"status": "success",
"data": isoforms,
"metadata": {
"gene_id": gene_id,
"species": species,
"total_isoforms": len(isoforms),
},
}
[文档]
def _get_principal(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
gene_id = (arguments.get("gene_id") or "").strip()
if not gene_id:
return {"status": "error", "error": "gene_id is required"}
species = (arguments.get("species") or "homo_sapiens").strip()
result = self._fetch(gene_id, species, methods="appris")
if result.get("status") == "error":
return result
isoforms = self._filter_principal_isoforms(result["data"])
priority_order = [f"PRINCIPAL:{i}" for i in range(1, 6)]
principal = next(
(
iso
for p in priority_order
for iso in isoforms
if iso.get("reliability") == p
),
None,
)
if not principal and isoforms:
principal = next(
(
iso
for iso in isoforms
if "Principal" in (iso.get("annotation") or "")
),
isoforms[0],
)
if not principal:
return {
"status": "error",
"error": f"No principal isoform found for {gene_id}",
}
return {
"status": "success",
"data": principal,
"metadata": {
"gene_id": gene_id,
"species": species,
"all_isoform_count": len(isoforms),
},
}
[文档]
def _get_functional_annotations(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
gene_id = (arguments.get("gene_id") or "").strip()
if not gene_id:
return {"status": "error", "error": "gene_id is required"}
species = (arguments.get("species") or "homo_sapiens").strip()
methods = (arguments.get("methods") or "").strip()
if methods:
invalid = [
m
for m in methods.split(",")
if m.strip() and m.strip() not in self.VALID_METHODS
]
if invalid:
return {
"status": "error",
"error": f"Invalid methods: {invalid}. Valid: {sorted(self.VALID_METHODS)}",
}
result = self._fetch(gene_id, species, methods=methods if methods else None)
if result.get("status") == "error":
return result
records = result["data"]
# Group by annotation type
by_type: Dict[str, list] = {}
for r in records:
t = r.get("type", "unknown")
by_type.setdefault(t, []).append(r)
transcript_id = arguments.get("transcript_id")
if transcript_id:
filtered: Dict[str, list] = {}
for t, items in by_type.items():
matching = [i for i in items if i.get("transcript_id") == transcript_id]
if matching:
filtered[t] = matching
by_type = filtered
return {
"status": "success",
"data": by_type,
"metadata": {
"gene_id": gene_id,
"species": species,
"annotation_types": sorted(by_type.keys()),
"total_records": sum(len(v) for v in by_type.values()),
},
}