Source code for tooluniverse.cbioportal_tool

import requests
from typing import Any, Dict
from .base_tool import BaseTool
from .tool_registry import register_tool


[docs] @register_tool("CBioPortalRESTTool") class CBioPortalRESTTool(BaseTool):
[docs] def __init__(self, tool_config: Dict): super().__init__(tool_config) self.base_url = "https://www.cbioportal.org/api" self.session = requests.Session() self.session.headers.update( { "Accept": "application/json", "Content-Type": "application/json", "User-Agent": "ToolUniverse/1.0", } ) self.timeout = 30
[docs] def _build_url(self, args: Dict[str, Any]) -> str: url = self.tool_config["fields"]["endpoint"] for k, v in args.items(): url = url.replace(f"{{{k}}}", str(v)) return url
[docs] def _get_gene_entrez_ids(self, gene_symbols: str) -> list[int]: """Convert gene symbols to Entrez IDs""" genes = [g.strip() for g in gene_symbols.split(",")] entrez_ids = [] for gene in genes: response = self.session.get( f"{self.base_url}/genes?keyword={gene}", timeout=self.timeout ) if response.status_code == 200: gene_data = response.json() if gene_data: entrez_ids.append(gene_data[0].get("entrezGeneId")) return entrez_ids
[docs] def _get_mutation_profile_id(self, study_id: str) -> str: """Get the mutation molecular profile ID for a study""" response = self.session.get( f"{self.base_url}/studies/{study_id}/molecular-profiles", timeout=self.timeout, ) if response.status_code == 200: profiles = response.json() for profile in profiles: alt_type = profile.get("molecularAlterationType") if alt_type == "MUTATION_EXTENDED": return profile.get("molecularProfileId") # Fallback to common naming pattern return f"{study_id}_mutations"
_ALTERATION_LABELS = { -2: "deep_deletion", -1: "shallow_loss", 0: "neutral", 1: "gain", 2: "amplification", }
[docs] def _get_cna_profile_id(self, study_id: str) -> str: """Get the discrete (GISTIC) copy-number molecular profile ID for a study.""" response = self.session.get( f"{self.base_url}/studies/{study_id}/molecular-profiles", timeout=self.timeout, ) if response.status_code == 200: for profile in response.json(): if ( profile.get("molecularAlterationType") == "COPY_NUMBER_ALTERATION" and profile.get("datatype") == "DISCRETE" ): return profile.get("molecularProfileId") # Fallback to the common GISTIC naming pattern. return f"{study_id}_gistic"
[docs] def _fetch_discrete_cna(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Fetch discrete copy-number alteration (CNA) calls for a gene in a study. Returns per-sample alteration values (-2,-1,0,1,2 = deep-deletion, shallow-loss, neutral, gain, amplification) from GISTIC profiles, plus a count breakdown by alteration type. """ study_id = arguments.get("study_id") if not study_id: return {"status": "error", "error": "study_id parameter is required"} gene_list = arguments.get("gene_list") or arguments.get("gene") if not gene_list: return {"status": "error", "error": "gene_list parameter is required"} event_type = (arguments.get("alteration_type") or "ALL").upper() valid_events = {"AMP", "GAIN", "DIPLOID", "HETLOSS", "HOMDEL", "ALL"} if event_type not in valid_events: event_type = "ALL" # Resolve molecular profile (allow explicit override). profile_id = arguments.get("molecular_profile_id") or self._get_cna_profile_id( study_id ) # Resolve gene symbols -> Entrez IDs. entrez_ids = self._get_gene_entrez_ids(gene_list) entrez_ids = [e for e in entrez_ids if e is not None] if not entrez_ids: return { "status": "error", "error": f"Could not find Entrez IDs for genes: {gene_list}", } sample_list_id = arguments.get("sample_list_id") or f"{study_id}_all" url = ( f"{self.base_url}/molecular-profiles/{profile_id}" f"/discrete-copy-number/fetch?projection=SUMMARY" ) if event_type != "ALL": url += f"&discreteCopyNumberEventType={event_type}" payload = {"entrezGeneIds": entrez_ids, "sampleListId": sample_list_id} response = self.session.post(url, json=payload, timeout=self.timeout) response.raise_for_status() data = response.json() if not isinstance(data, list): data = [] # Tally alteration values into human-readable categories. counts: Dict[str, int] = {} for rec in data: label = self._ALTERATION_LABELS.get(rec.get("alteration"), "unknown") counts[label] = counts.get(label, 0) + 1 return { "status": "success", "data": data, "url": url, "count": len(data), "molecular_profile_id": profile_id, "entrez_gene_ids": entrez_ids, "alteration_type": event_type, "alteration_counts": counts, }
[docs] def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]: try: if "query" in arguments and "keyword" not in arguments: arguments = {**arguments, "keyword": arguments["query"]} if ( "get_genes" in self.tool_config.get("name", "") and "keyword" not in arguments ): return { "status": "error", "error": "keyword or query parameter is required", } method = self.tool_config["fields"].get("method", "GET") url = self._build_url(arguments) # Special handling for discrete copy-number alteration (CNA) queries. if "cBioPortal_get_copy_number_alterations" in self.tool_config.get( "name", "" ): return self._fetch_discrete_cna(arguments) # Special handling for mutation queries with new API if "cBioPortal_get_mutations" in self.tool_config.get("name", ""): study_id = arguments.get("study_id") gene_list = arguments.get("gene_list") sample_list_id = arguments.get("sample_list_id") # Get molecular profile ID profile_id = self._get_mutation_profile_id(study_id) # Get gene Entrez IDs entrez_ids = self._get_gene_entrez_ids(gene_list) if not entrez_ids: error_msg = f"Could not find Entrez IDs for genes: {gene_list}" return {"status": "error", "error": error_msg} # Use the new API endpoint url = f"{self.base_url}/molecular-profiles/{profile_id}/mutations/fetch" # Build payload payload = {"entrezGeneIds": entrez_ids} # Add sample filter if provided, otherwise use all samples if sample_list_id: payload["sampleListId"] = sample_list_id else: payload["sampleListId"] = f"{study_id}_all" response = self.session.post(url, json=payload, timeout=self.timeout) response.raise_for_status() data = response.json() return { "status": "success", "data": data, "url": url, "count": len(data) if isinstance(data, list) else 1, "molecular_profile_id": profile_id, "entrez_gene_ids": entrez_ids, } # Handle regular GET or POST requests if method == "POST": payload = self.tool_config["fields"].get("payload", {}) # Replace placeholders in payload for k, v in arguments.items(): if isinstance(payload, dict): for pk, pv in payload.items(): if isinstance(pv, str): payload[pk] = pv.replace(f"{{{k}}}", str(v)) response = self.session.post(url, json=payload, timeout=self.timeout) else: response = self.session.get(url, timeout=self.timeout) response.raise_for_status() data = response.json() return { "status": "success", "data": data, "url": url, "count": len(data) if isinstance(data, list) else 1, } except Exception as e: return { "status": "error", "error": f"cBioPortal API error: {str(e)}", "url": url if "url" in locals() else "unknown", }