Source code for tooluniverse.panther_tool
# panther_tool.py
"""
PANTHER REST API tool for ToolUniverse.
PANTHER (Protein ANalysis THrough Evolutionary Relationships) classifies
proteins and their genes by function using a library of phylogenetic trees.
It provides gene functional classification, pathway analysis, and
overrepresentation (enrichment) analysis for gene lists.
API: https://pantherdb.org/services/oai/pantherdb/
No authentication required. Free for all use.
Supports 144 organisms.
"""
import requests
from typing import Dict, Any
from .base_tool import BaseTool
from .tool_registry import register_tool
PANTHER_BASE_URL = "https://pantherdb.org/services/oai/pantherdb"
[docs]
@register_tool("PANTHERTool")
class PANTHERTool(BaseTool):
"""
Tool for querying PANTHER gene classification and enrichment analysis.
Provides gene functional annotation, overrepresentation analysis
with GO/pathway enrichment, and ortholog mapping across 144 organisms.
No authentication required.
"""
[docs]
def __init__(self, tool_config: Dict[str, Any]):
super().__init__(tool_config)
self.timeout = tool_config.get("timeout", 60)
self.endpoint_type = tool_config.get("fields", {}).get(
"endpoint_type", "gene_info"
)
[docs]
def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Execute the PANTHER API call."""
try:
return self._dispatch(arguments)
except requests.exceptions.Timeout:
return {
"error": f"PANTHER API request timed out after {self.timeout} seconds"
}
except requests.exceptions.ConnectionError:
return {
"error": "Failed to connect to PANTHER API. Check network connectivity."
}
except requests.exceptions.HTTPError as e:
return {"error": f"PANTHER API HTTP error: {e.response.status_code}"}
except Exception as e:
return {"error": f"Unexpected error querying PANTHER: {str(e)}"}
[docs]
def _dispatch(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Route to appropriate endpoint based on config."""
if self.endpoint_type == "gene_info":
return self._gene_info(arguments)
elif self.endpoint_type == "enrichment":
return self._enrichment(arguments)
elif self.endpoint_type == "ortholog":
return self._ortholog(arguments)
else:
return {"error": f"Unknown endpoint_type: {self.endpoint_type}"}
[docs]
def _gene_info(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Get gene classification and functional annotation from PANTHER."""
gene_id = arguments.get("gene_id", "")
organism = arguments.get("organism", 9606)
if not gene_id:
return {"error": "gene_id parameter is required (e.g., 'P04637' for TP53)"}
if organism is None:
organism = 9606
url = f"{PANTHER_BASE_URL}/geneinfo"
params = {
"geneInputList": gene_id,
"organism": organism,
"type": "ortholog",
}
response = requests.get(
url,
params=params,
headers={"Accept": "application/json"},
timeout=self.timeout,
)
response.raise_for_status()
raw = response.json()
search = raw.get("search", {})
mapped = search.get("mapped_genes", {})
gene_data = mapped.get("gene", {})
# Handle both single gene (dict) and multiple genes (list)
if isinstance(gene_data, list):
gene_data = gene_data[0] if gene_data else {}
family_id = gene_data.get("family_id", None)
sf_id = gene_data.get("sf_id", None)
# Extract annotations by category
annotations = []
ann_type_list = gene_data.get("annotation_type_list", {}).get(
"annotation_data_type", []
)
if isinstance(ann_type_list, dict):
ann_type_list = [ann_type_list]
for ann_type in ann_type_list:
category = ann_type.get("content", "")
ann_list = ann_type.get("annotation_list", {}).get("annotation", [])
if isinstance(ann_list, dict):
ann_list = [ann_list]
terms = []
for ann in ann_list:
terms.append(
{
"id": ann.get("id", ""),
"name": ann.get("name", ""),
}
)
if terms:
annotations.append(
{
"category": category,
"terms": terms,
}
)
result = {
"gene_id": gene_id,
"organism": organism,
"family_id": family_id,
"subfamily_id": sf_id,
"annotations": annotations,
}
return {
"data": result,
"metadata": {
"source": "PANTHER",
"query": gene_id,
"endpoint": "geneinfo",
},
}
[docs]
def _enrichment(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Perform gene set enrichment (overrepresentation) analysis."""
gene_list = arguments.get("gene_list", "")
organism = arguments.get("organism", 9606)
annotation_dataset = arguments.get("annotation_dataset", "GO:0008150")
if not gene_list:
return {
"error": "gene_list parameter is required (e.g., 'TP53,BRCA1,EGFR,KRAS')"
}
if organism is None:
organism = 9606
if annotation_dataset is None:
annotation_dataset = "GO:0008150"
url = f"{PANTHER_BASE_URL}/enrich/overrep"
params = {
"geneInputList": gene_list,
"organism": organism,
"annotDataSet": annotation_dataset,
"enrichmentTestType": "FISHER",
"correction": "FDR",
}
response = requests.get(
url,
params=params,
headers={"Accept": "application/json"},
timeout=self.timeout,
)
response.raise_for_status()
raw = response.json()
results = raw.get("results", {}).get("result", [])
if isinstance(results, dict):
results = [results]
# Filter to significant results (FDR < 0.05) and sort by fold enrichment
enriched = []
for r in results:
fdr = r.get("fdr", 1.0)
if fdr is None or not isinstance(fdr, (int, float)):
continue
fold = r.get("fold_enrichment", 0)
if fold is None or not isinstance(fold, (int, float)):
continue
term = r.get("term", {})
enriched.append(
{
"term_id": term.get("id", ""),
"term_label": term.get("label", ""),
"number_in_list": r.get("number_in_list", 0),
"number_in_reference": r.get("number_in_reference", 0),
"expected": r.get("expected", 0.0),
"fold_enrichment": fold,
"pvalue": r.get("pValue", 1.0),
"fdr": fdr,
"direction": r.get("plus_minus", ""),
}
)
# Sort by FDR then fold enrichment
enriched.sort(key=lambda x: (x["fdr"], -x["fold_enrichment"]))
# Return top 50 most significant
enriched_top = enriched[:50]
result = {
"gene_list": gene_list,
"organism": organism,
"annotation_dataset": annotation_dataset,
"result_count": len(enriched_top),
"enriched_terms": enriched_top,
}
return {
"data": result,
"metadata": {
"source": "PANTHER",
"query": gene_list,
"endpoint": "enrich/overrep",
},
}
[docs]
def _ortholog(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Find orthologs of a gene across species."""
gene_id = arguments.get("gene_id", "")
organism = arguments.get("organism", 9606)
target_organism = arguments.get("target_organism", 10090)
ortholog_type = arguments.get("ortholog_type", "LDO")
if not gene_id:
return {"error": "gene_id parameter is required (e.g., 'P04637' for TP53)"}
if organism is None:
organism = 9606
if target_organism is None:
target_organism = 10090
if ortholog_type is None:
ortholog_type = "LDO"
url = f"{PANTHER_BASE_URL}/ortholog/matchortho"
params = {
"geneInputList": gene_id,
"organism": organism,
"targetOrganism": target_organism,
"orthologType": ortholog_type,
}
response = requests.get(
url,
params=params,
headers={"Accept": "application/json"},
timeout=self.timeout,
)
response.raise_for_status()
raw = response.json()
search = raw.get("search", {})
mapping_data = search.get("mapping", {})
mapped = mapping_data.get("mapped", {})
mapping = None
if mapped:
# Handle single mapping (dict) or multiple (list)
if isinstance(mapped, list):
mapped = mapped[0] if mapped else {}
mapping = {
"source_gene": mapped.get("gene", ""),
"target_gene": mapped.get("target_gene", ""),
"target_gene_symbol": mapped.get("target_gene_symbol", None),
"ortholog_type": mapped.get("ortholog", ""),
"persistent_id": mapped.get("persistent_id", None),
"target_persistent_id": mapped.get("target_persistent_id", None),
}
result = {
"gene_id": gene_id,
"source_organism": organism,
"target_organism": target_organism,
"ortholog_type": ortholog_type,
"mapping": mapping,
}
return {
"data": result,
"metadata": {
"source": "PANTHER",
"query": gene_id,
"endpoint": "ortholog/matchortho",
},
}