tooluniverse.scxa_tool 源代码

# scxa_tool.py
"""
Single Cell Expression Atlas (SCXA) REST API tool for ToolUniverse.

EBI's Single Cell Expression Atlas provides curated single-cell RNA-seq
experiments with cell type annotations, marker genes, and expression data
across 380+ experiments from multiple species.

API: https://www.ebi.ac.uk/gxa/sc/json/
No authentication required.
"""

import requests
from typing import Dict, Any
from .base_tool import BaseTool
from .tool_registry import register_tool

SCXA_BASE_URL = "https://www.ebi.ac.uk/gxa/sc/json"


[文档] @register_tool("SCExpressionAtlasTool") class SCExpressionAtlasTool(BaseTool): """ Tool for querying EBI Single Cell Expression Atlas. Supports listing single-cell RNA-seq experiments with metadata (species, cell counts, technology, factors) and searching for experiments where a gene is expressed at single-cell resolution. No authentication required. """
[文档] def __init__(self, tool_config: Dict[str, Any]): super().__init__(tool_config) self.timeout = tool_config.get("timeout", 60) self.operation = tool_config.get("fields", {}).get( "operation", "list_experiments" )
[文档] def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Execute the SCXA API call.""" try: if self.operation == "list_experiments": return self._list_experiments(arguments) elif self.operation == "search_gene": return self._search_gene(arguments) return { "status": "error", "error": f"Unknown operation: {self.operation}", } except requests.exceptions.Timeout: return { "status": "error", "error": f"SCXA API request timed out after {self.timeout}s", } except requests.exceptions.ConnectionError: return { "status": "error", "error": "Failed to connect to SCXA API. Check network.", } except Exception as e: return { "status": "error", "error": f"Error querying SCXA: {str(e)}", }
[文档] def _list_experiments(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """List all single-cell RNA-seq experiments with optional filtering.""" url = f"{SCXA_BASE_URL}/experiments" response = requests.get(url, timeout=self.timeout) response.raise_for_status() raw = response.json() experiments = raw.get("experiments", []) species_filter = arguments.get("species") if species_filter: species_lower = species_filter.lower() experiments = [ e for e in experiments if species_lower in e.get("species", "").lower() ] keyword = arguments.get("keyword") if keyword: kw_lower = keyword.lower() experiments = [ e for e in experiments if kw_lower in e.get("experimentDescription", "").lower() or kw_lower in " ".join(e.get("experimentalFactors", [])).lower() ] total = len(experiments) limit = min(arguments.get("limit", 20), 100) experiments = experiments[:limit] results = [] for exp in experiments: results.append( { "accession": exp.get("experimentAccession"), "description": exp.get("experimentDescription"), "species": exp.get("species"), "technology": exp.get("technologyType"), "num_cells": exp.get("numberOfAssays"), "experimental_factors": exp.get("experimentalFactors"), "experiment_type": exp.get("rawExperimentType"), "last_updated": exp.get("lastUpdate"), } ) return { "status": "success", "data": results, "metadata": { "total_matching": total, "returned": len(results), "source": "EBI Single Cell Expression Atlas", }, }
[文档] def _search_gene(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Search for SC experiments where a gene is expressed.""" gene = arguments.get("gene", "") if not gene: return { "status": "error", "error": "gene parameter is required (symbol like TP53 or Ensembl ID like ENSG00000141510)", } params = {} if gene.startswith("ENSG") or gene.startswith("ENSMUS"): params["ensgene"] = gene else: params["symbol"] = gene species = arguments.get("species") if species: params["species"] = species url = f"{SCXA_BASE_URL}/search" response = requests.get(url, params=params, timeout=self.timeout) response.raise_for_status() raw = response.json() matching_gene_id = raw.get("matchingGeneId", "") results = [] for r in raw.get("results", []): elem = r.get("element", {}) results.append( { "accession": elem.get("experimentAccession"), "description": elem.get("experimentDescription"), "species": elem.get("species"), "num_cells": elem.get("numberOfAssays"), "technology": elem.get("technologyType"), "experimental_factors": elem.get("experimentalFactors"), "pubmed_ids": elem.get("pubMedIds"), "dois": elem.get("dois"), } ) return { "status": "success", "data": results, "metadata": { "gene_query": gene, "matching_gene_id": matching_gene_id, "total_experiments": len(results), "source": "EBI Single Cell Expression Atlas", }, }