Source code for tooluniverse.pombase_tool

# pombase_tool.py
"""
PomBase REST API tool for ToolUniverse.

PomBase is the comprehensive database for the fission yeast
Schizosaccharomyces pombe. It provides curated gene information,
protein domains, phenotypes, GO annotations, and interactions.
Complements SGD (budding yeast S. cerevisiae).

API: https://www.pombase.org/api/v1/dataset/latest/data
No authentication required. Free for academic/research use.
"""

import requests
from typing import Dict, Any
from .base_tool import BaseTool
from .tool_registry import register_tool

POMBASE_BASE_URL = "https://www.pombase.org/api/v1/dataset/latest/data"


[docs] @register_tool("PomBaseTool") class PomBaseTool(BaseTool): """ Tool for querying PomBase, the S. pombe genome database. Provides detailed gene information for fission yeast including protein domains, GO annotations, phenotypes, and more. No authentication required. """
[docs] def __init__(self, tool_config: Dict[str, Any]): super().__init__(tool_config) self.timeout = tool_config.get("timeout", 30) self.endpoint_type = tool_config.get("fields", {}).get( "endpoint_type", "gene_detail" )
[docs] def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Execute the PomBase API call.""" try: return self._dispatch(arguments) except requests.exceptions.Timeout: return { "error": f"PomBase API request timed out after {self.timeout} seconds" } except requests.exceptions.ConnectionError: return { "error": "Failed to connect to PomBase API. Check network connectivity." } except requests.exceptions.HTTPError as e: return {"error": f"PomBase API HTTP error: {e.response.status_code}"} except Exception as e: return {"error": f"Unexpected error querying PomBase: {str(e)}"}
[docs] def _dispatch(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Route to appropriate endpoint based on config.""" if self.endpoint_type == "gene_detail": return self._gene_detail(arguments) elif self.endpoint_type == "gene_summary_search": return self._gene_summary_search(arguments) elif self.endpoint_type == "gene_phenotypes": return self._gene_phenotypes(arguments) else: return {"error": f"Unknown endpoint_type: {self.endpoint_type}"}
[docs] def _gene_detail(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Get detailed gene information from PomBase by systematic ID.""" gene_id = arguments.get("gene_id", "") if not gene_id: return { "error": "gene_id parameter is required (e.g., 'SPBC11B10.09' for cdc2)" } url = f"{POMBASE_BASE_URL}/gene/{gene_id}" response = requests.get( url, headers={"Accept": "application/json"}, timeout=self.timeout, ) response.raise_for_status() raw = response.json() # Extract key fields interpro = [] for match in raw.get("interpro_matches", [])[:15]: interpro.append( { "id": match.get("id"), "db": match.get("dbname"), "name": match.get("name"), "description": match.get("description") or match.get("interpro_description"), "interpro_id": match.get("interpro_id"), "start": match.get("match_start"), "end": match.get("match_end"), } ) # Extract TM domains if present tm_domains = raw.get("tm_domain_coords", []) result = { "systematic_id": raw.get("uniquename"), "gene_name": raw.get("name"), "product": raw.get("product"), "taxon_id": raw.get("taxonid"), "uniprot_id": raw.get("uniprot_identifier"), "deletion_viability": raw.get("deletion_viability"), "biogrid_id": raw.get("biogrid_interactor_id"), "interpro_domains": interpro, "tm_domains": tm_domains[:10] if tm_domains else [], "characterisation_status": raw.get("characterisation_status"), } return { "data": result, "metadata": { "source": "PomBase", "query": gene_id, "endpoint": "gene_detail", }, }
[docs] def _gene_phenotypes(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Get phenotype information for a PomBase gene.""" gene_id = arguments.get("gene_id", "") if not gene_id: return { "error": "gene_id parameter is required (e.g., 'SPBC11B10.09' for cdc2)" } # Get full gene data url = f"{POMBASE_BASE_URL}/gene/{gene_id}" response = requests.get( url, headers={"Accept": "application/json"}, timeout=self.timeout, ) response.raise_for_status() raw = response.json() # Extract phenotype annotations from cv_annotations phenotypes = [] cv_annotations = raw.get("cv_annotations", {}) terms_lookup = raw.get("terms_by_termid", {}) # Phenotype annotations are under single_locus_phenotype / multi_locus_phenotype for cv_name, annotations in cv_annotations.items(): if "phenotype" in cv_name.lower(): for ann in annotations[:30]: term_id = ann.get("term", "") # Look up term name from the terms_by_termid dict term_info = terms_lookup.get(term_id, {}) term_name = ( term_info.get("name") if isinstance(term_info, dict) else None ) phenotypes.append( { "term_id": term_id, "term_name": term_name, "cv_name": cv_name, "is_not": ann.get("is_not", False), } ) # Also check physical_interactions and genetic_interactions counts gene_name = raw.get("name", gene_id) deletion_viability = raw.get("deletion_viability") result = { "systematic_id": raw.get("uniquename"), "gene_name": gene_name, "deletion_viability": deletion_viability, "phenotype_count": len(phenotypes), "phenotypes": phenotypes[:50], } return { "data": result, "metadata": { "source": "PomBase", "query": gene_id, "endpoint": "gene_phenotypes", }, }