Source code for tooluniverse.expression_atlas_tool

# expression_atlas_tool.py
"""
EBI Expression Atlas API tool for ToolUniverse.

Expression Atlas provides gene expression data across species and biological conditions,
including baseline (normal tissue/cell type) and differential (disease vs. normal) expression.

Data includes:
- Baseline tissue/cell-type expression from RNA-seq and proteomics
- Differential expression between conditions (disease, treatment, etc.)
- Experiment metadata and design

API Base URL: https://www.ebi.ac.uk/gxa/
No authentication required.
"""

import requests
from typing import Dict, Any
from .base_tool import BaseTool
from .tool_registry import register_tool

# Base URLs
GXA_BASE = "https://www.ebi.ac.uk/gxa"
EBI_SEARCH_BASE = "https://www.ebi.ac.uk/ebisearch/ws/rest"


[docs] @register_tool("ExpressionAtlasTool") class ExpressionAtlasTool(BaseTool): """ Tool for querying EBI Expression Atlas gene expression data. Provides access to: - Baseline gene expression across tissues and cell types - Differential expression in disease and treatment contexts - Experiment search and metadata - Both bulk RNA-seq and single-cell data No authentication required. """
[docs] def __init__(self, tool_config: Dict[str, Any]): super().__init__(tool_config) self.timeout = tool_config.get("timeout", 30) self.operation = tool_config.get("fields", {}).get( "operation", "get_baseline_expression" )
[docs] def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Execute the Expression Atlas API call.""" operation = self.operation if operation == "get_baseline_expression": return self._get_baseline_expression(arguments) elif operation == "search_differential_experiments": return self._search_differential_experiments(arguments) elif operation == "search_experiments": return self._search_experiments(arguments) elif operation == "get_experiment": return self._get_experiment(arguments) else: return {"status": "error", "error": f"Unknown operation: {operation}"}
[docs] def _get_baseline_expression(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """ Get baseline expression experiments for a gene. Uses EBI Search to find experiments mentioning the gene, then filters the Expression Atlas experiment catalog for baseline experiments in the specified species. """ gene = arguments.get("gene", "") species = arguments.get("species", "homo sapiens") if not gene: return { "status": "error", "error": "gene parameter is required", } try: # Step 1: Get all experiments from Expression Atlas all_url = f"{GXA_BASE}/json/experiments" all_resp = requests.get(all_url, timeout=self.timeout) all_resp.raise_for_status() all_data = all_resp.json() all_experiments = all_data.get("experiments", []) # Filter for baseline experiments in the species species_lower = species.lower() baseline_exps = [ e for e in all_experiments if "BASELINE" in e.get("rawExperimentType", "") and e.get("species", "").lower() == species_lower ] # Step 2: Search EBI Search for gene-specific experiments search_url = f"{EBI_SEARCH_BASE}/atlas-experiments" search_params = { "query": gene, "size": 100, "format": "json", "fields": "description", } search_resp = requests.get( search_url, params=search_params, timeout=self.timeout, ) gene_experiment_ids = set() if search_resp.status_code == 200: search_data = search_resp.json() for entry in search_data.get("entries", []): gene_experiment_ids.add(entry.get("id")) # Combine: tag baseline experiments that mention the gene results = [] for exp in baseline_exps: acc = exp.get("experimentAccession", "") results.append( { "experiment_accession": acc, "experiment_type": exp.get("rawExperimentType"), "experiment_description": exp.get("experimentDescription"), "species": exp.get("species"), "num_assays": exp.get("numberOfAssays"), "gene_mentioned": acc in gene_experiment_ids, "last_update": exp.get("lastUpdate"), } ) # Sort: gene-mentioned first, then by assay count results.sort( key=lambda x: ( not x.get("gene_mentioned", False), -(x.get("num_assays") or 0), ) ) return { "status": "success", "data": { "gene": gene, "species": species, "baseline_experiments": results[:50], "total_baseline": len(baseline_exps), "gene_specific_count": len( [r for r in results if r["gene_mentioned"]] ), }, "source": ("EBI Expression Atlas - Baseline Expression"), } except requests.exceptions.Timeout: return { "status": "error", "error": (f"Expression Atlas API timeout after {self.timeout}s"), } except requests.exceptions.HTTPError as e: sc = e.response.status_code if e.response is not None else "unknown" return { "status": "error", "error": (f"Expression Atlas API HTTP error: {sc}"), } except requests.exceptions.RequestException as e: return { "status": "error", "error": (f"Expression Atlas API request failed: {str(e)}"), } except Exception as e: return { "status": "error", "error": f"Unexpected error: {str(e)}", }
[docs] def _search_differential_experiments( self, arguments: Dict[str, Any] ) -> Dict[str, Any]: """ Search for differential expression experiments by gene and/or condition. """ gene = arguments.get("gene", "") condition = arguments.get("condition", "") species = arguments.get("species", "homo sapiens") if not gene and not condition: return { "status": "error", "error": ("Either gene or condition parameter is required"), } try: # Get all experiments url = f"{GXA_BASE}/json/experiments" response = requests.get(url, timeout=self.timeout) response.raise_for_status() data = response.json() all_experiments = data.get("experiments", []) species_lower = species.lower() # Filter for differential experiments diff_exps = [ e for e in all_experiments if "DIFFERENTIAL" in e.get("rawExperimentType", "") and (not species or e.get("species", "").lower() == species_lower) ] # If condition specified, filter by description if condition: cond_lower = condition.lower() diff_exps = [ e for e in diff_exps if cond_lower in e.get("experimentDescription", "").lower() ] # If gene specified, cross-reference with # EBI Search gene-experiment matches gene_exp_ids = set() if gene: search_url = f"{EBI_SEARCH_BASE}/atlas-experiments" search_resp = requests.get( search_url, params={ "query": gene, "size": 100, "format": "json", }, timeout=self.timeout, ) if search_resp.status_code == 200: for entry in search_resp.json().get("entries", []): gene_exp_ids.add(entry.get("id")) experiments = [] for exp in diff_exps: acc = exp.get("experimentAccession", "") experiments.append( { "experiment_accession": acc, "experiment_type": exp.get("rawExperimentType"), "experiment_description": exp.get("experimentDescription"), "species": exp.get("species"), "num_assays": exp.get("numberOfAssays"), "gene_mentioned": (acc in gene_exp_ids if gene else None), "factors": exp.get("experimentalFactors", []), } ) # Sort: gene-mentioned first if gene: experiments.sort( key=lambda x: ( not x.get("gene_mentioned", False), -(x.get("num_assays") or 0), ) ) return { "status": "success", "data": { "gene": gene, "condition": condition, "species": species, "experiments": experiments[:50], "experiment_count": len(experiments), }, "source": ("EBI Expression Atlas - Differential Expression"), } except requests.exceptions.Timeout: return { "status": "error", "error": (f"Expression Atlas API timeout after {self.timeout}s"), } except requests.exceptions.HTTPError as e: sc = e.response.status_code if e.response is not None else "unknown" return { "status": "error", "error": (f"Expression Atlas API HTTP error: {sc}"), } except requests.exceptions.RequestException as e: return { "status": "error", "error": (f"Expression Atlas API request failed: {str(e)}"), } except Exception as e: return { "status": "error", "error": f"Unexpected error: {str(e)}", }
[docs] def _search_experiments(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """ Search Expression Atlas experiments by gene and/or condition. Uses EBI Search for gene-specific queries and filters the full experiment catalog by species and condition text. """ gene = arguments.get("gene", "") condition = arguments.get("condition", "") species = arguments.get("species", "") if not gene and not condition: return { "status": "error", "error": ("Either gene or condition parameter is required"), } try: # Get gene-specific experiment IDs from EBI Search gene_exp_ids = set() if gene: search_url = f"{EBI_SEARCH_BASE}/atlas-experiments" search_resp = requests.get( search_url, params={ "query": gene, "size": 100, "format": "json", }, timeout=self.timeout, ) if search_resp.status_code == 200: for entry in search_resp.json().get("entries", []): gene_exp_ids.add(entry.get("id")) # Get full experiment catalog url = f"{GXA_BASE}/json/experiments" response = requests.get(url, timeout=self.timeout) response.raise_for_status() data = response.json() all_experiments = data.get("experiments", []) # Apply filters filtered = all_experiments if species: sp_lower = species.lower() filtered = [ e for e in filtered if e.get("species", "").lower() == sp_lower ] if condition: cond_lower = condition.lower() filtered = [ e for e in filtered if cond_lower in e.get("experimentDescription", "").lower() ] # Build results experiments = [] for exp in filtered: acc = exp.get("experimentAccession", "") experiments.append( { "experiment_accession": acc, "experiment_type": exp.get("rawExperimentType"), "experiment_description": exp.get("experimentDescription"), "species": exp.get("species"), "num_assays": exp.get("numberOfAssays"), "gene_mentioned": (acc in gene_exp_ids if gene else None), } ) # Sort: gene-mentioned first if gene: experiments.sort( key=lambda x: ( not x.get("gene_mentioned", False), -(x.get("num_assays") or 0), ) ) return { "status": "success", "data": { "gene": gene, "condition": condition, "species": species, "experiments": experiments[:50], "total_count": len(experiments), "gene_specific_count": len(gene_exp_ids), }, "source": "EBI Expression Atlas", } except requests.exceptions.Timeout: return { "status": "error", "error": (f"Expression Atlas API timeout after {self.timeout}s"), } except requests.exceptions.HTTPError as e: sc = e.response.status_code if e.response is not None else "unknown" return { "status": "error", "error": (f"Expression Atlas API HTTP error: {sc}"), } except requests.exceptions.RequestException as e: return { "status": "error", "error": (f"Expression Atlas API request failed: {str(e)}"), } except Exception as e: return { "status": "error", "error": f"Unexpected error: {str(e)}", }
[docs] def _get_experiment(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """ Get detailed metadata for a specific Expression Atlas experiment. Returns experiment design, assays, and analysis information. """ accession = arguments.get("accession", "") if not accession: return {"status": "error", "error": "accession parameter is required"} try: url = f"{GXA_BASE}/json/experiments/{accession}" response = requests.get(url, timeout=self.timeout) response.raise_for_status() data = response.json() experiment = data.get("experiment", data) return { "status": "success", "data": { "accession": experiment.get("accession", accession), "type": experiment.get("type"), "description": experiment.get("description"), "species": experiment.get("species"), "factors": experiment.get("experimentalFactors", []), "technology": experiment.get("technologyType", []), "contrasts": experiment.get("contrasts", []), "assay_count": experiment.get("numberOfAssays"), "last_update": experiment.get("lastUpdate"), "pubmed_ids": experiment.get("pubmedIds", []), }, "source": "EBI Expression Atlas", } except requests.exceptions.Timeout: return { "status": "error", "error": f"Expression Atlas API timeout after {self.timeout}s", } except requests.exceptions.HTTPError as e: status_code = ( e.response.status_code if e.response is not None else "unknown" ) if status_code == 404: return { "status": "success", "data": None, "message": f"Experiment not found: {accession}", } return { "status": "error", "error": f"Expression Atlas API HTTP error: {status_code}", } except requests.exceptions.RequestException as e: return { "status": "error", "error": f"Expression Atlas API request failed: {str(e)}", } except Exception as e: return {"status": "error", "error": f"Unexpected error: {str(e)}"}