Source code for tooluniverse.civic_tool
"""
CIViC (Clinical Interpretation of Variants in Cancer) API tool for ToolUniverse.
CIViC is a community knowledgebase for expert-curated interpretations of variants
in cancer. It provides clinical evidence levels and interpretations.
API Documentation: https://civicdb.org/api
GraphQL Endpoint: https://civicdb.org/api/graphql
"""
import requests
from typing import Dict, Any, Optional
from .base_tool import BaseTool
from .tool_registry import register_tool
# Base URL for CIViC
CIVIC_BASE_URL = "https://civicdb.org/api"
CIVIC_GRAPHQL_URL = f"{CIVIC_BASE_URL}/graphql"
[docs]
@register_tool("CIViCTool")
class CIViCTool(BaseTool):
"""
Tool for querying CIViC (Clinical Interpretation of Variants in Cancer).
CIViC provides:
- Expert-curated cancer variant interpretations
- Clinical evidence levels
- Drug-variant associations
- Disease-variant associations
Uses GraphQL API. No authentication required. Free for academic/research use.
"""
[docs]
def __init__(self, tool_config: Dict[str, Any]):
super().__init__(tool_config)
fields = tool_config.get("fields", {})
self.query_template: str = fields.get("query", "")
self.operation_name: Optional[str] = fields.get("operation_name")
self.timeout: int = tool_config.get("timeout", 30)
# array_wrap: maps argument name -> GraphQL variable name, wrapping string in a list
# e.g. {"gene_symbol": "entrezSymbols"} means arguments["gene_symbol"] -> variables["entrezSymbols"] = [value]
self.array_wrap: Dict[str, str] = fields.get("array_wrap", {})
# param_map: maps argument name -> GraphQL variable name (without list wrapping)
# e.g. {"therapy": "therapyName"} means arguments["therapy"] -> variables["therapyName"] = value
self.param_map: Dict[str, str] = fields.get("param_map", {})
# variable_defaults: applies default values for GraphQL variables not supplied by user
# e.g. {"status": "ACCEPTED"} sets status=ACCEPTED when not explicitly provided
self.variable_defaults: Dict[str, Any] = fields.get("variable_defaults", {})
[docs]
def _build_graphql_query(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Build GraphQL query from template and arguments."""
query = self.query_template
# GraphQL queries use variables, not string replacement
# Extract variable names from query (e.g., $limit, $gene_id)
import re
var_matches = re.findall(r"\$(\w+)", query)
# Map arguments to GraphQL variables
# GraphQL variable names match argument names in our config
variables = {}
for var_name in var_matches:
# Check if argument exists (variable name matches argument name)
if var_name in arguments:
variables[var_name] = arguments[var_name]
# Handle array_wrap: convert string arguments to lists for array-typed GraphQL variables
for arg_name, var_name in self.array_wrap.items():
if arg_name in arguments and arguments[arg_name] is not None:
val = arguments[arg_name]
variables[var_name] = [val] if not isinstance(val, list) else val
# Handle param_map: rename argument names to GraphQL variable names
# Only sets the variable if not already set by direct name match
for arg_name, var_name in self.param_map.items():
if arg_name in arguments and arguments[arg_name] is not None:
if var_name not in variables:
variables[var_name] = arguments[arg_name]
# Apply variable_defaults: set defaults for variables not already set by arguments
for var_name, default_val in self.variable_defaults.items():
if var_name not in variables and var_name in var_matches:
variables[var_name] = default_val
payload = {"query": query}
if self.operation_name:
payload["operationName"] = self.operation_name
if variables:
payload["variables"] = variables
return payload
[docs]
def _lookup_gene_id(self, gene_name: str) -> Optional[int]:
"""Look up CIViC gene ID by gene symbol via GraphQL."""
payload = {
"query": "query GetGenes($entrezSymbols: [String!]) { genes(entrezSymbols: $entrezSymbols) { nodes { id name } } }",
"variables": {"entrezSymbols": [gene_name.upper()]},
}
try:
resp = requests.post(
CIVIC_GRAPHQL_URL,
json=payload,
timeout=10,
headers={
"Content-Type": "application/json",
"Accept": "application/json",
},
)
data = resp.json().get("data", {})
nodes = data.get("genes", {}).get("nodes", [])
if nodes:
return nodes[0]["id"]
except Exception:
pass
return None
[docs]
def _get_variants_for_gene_id(
self, gene_id: int, limit: int = 500
) -> Dict[str, Any]:
"""Fetch variants for a given CIViC gene_id via GraphQL.
Feature-45A-01: CIViC API caps variants(first:) at 100 server-side.
Use cursor-based pagination to fetch all variants up to `limit`.
"""
# Feature-41A-02: include feature { id name } so callers can distinguish
# e.g. KRAS G12C (ID 78) from NRAS G12C (ID 897).
PAGINATED_QUERY = (
"query GetVariantsByGene($gene_id: Int!, $page_size: Int, $after: String) { "
"gene(id: $gene_id) { id name variants(first: $page_size, after: $after) { "
"nodes { id name ... on GeneVariant { feature { id name } } } "
"pageInfo { hasNextPage endCursor } } } }"
)
PAGE_SIZE = 100 # CIViC server max per page
all_nodes: list = []
cursor = None
gene_meta: Dict[str, Any] = {}
try:
while len(all_nodes) < limit:
fetch = min(PAGE_SIZE, limit - len(all_nodes))
variables: Dict[str, Any] = {
"gene_id": gene_id,
"page_size": fetch,
}
if cursor:
variables["after"] = cursor
resp = requests.post(
CIVIC_GRAPHQL_URL,
json={
"query": PAGINATED_QUERY,
"operationName": "GetVariantsByGene",
"variables": variables,
},
timeout=30,
headers={
"Content-Type": "application/json",
"Accept": "application/json",
},
)
resp_data = resp.json().get("data", {})
gene_data = resp_data.get("gene", {})
if not gene_meta:
gene_meta = {
"id": gene_data.get("id"),
"name": gene_data.get("name"),
}
variants_block = gene_data.get("variants", {})
nodes = variants_block.get("nodes", [])
all_nodes.extend(nodes)
page_info = variants_block.get("pageInfo", {})
if not page_info.get("hasNextPage"):
break
cursor = page_info.get("endCursor")
if not cursor:
break
# Feature-46B-01: deduplicate by variant ID (prevents pagination overlap artifacts)
seen_ids: set = set()
deduped: list = []
name_count: Dict[str, int] = {}
for node in all_nodes:
node_id = node.get("id")
if node_id not in seen_ids:
seen_ids.add(node_id)
# Strip leading/trailing whitespace from variant names (API artifact)
if "name" in node and isinstance(node["name"], str):
node["name"] = node["name"].strip()
deduped.append(node)
name = node.get("name", "")
name_count[name] = name_count.get(name, 0) + 1
all_nodes = deduped
# Flag variant names that appear multiple times (distinct CIViC records)
duplicate_names = [n for n, c in name_count.items() if c > 1]
# Reassemble in the original single-request structure
data = {
"gene": {
**gene_meta,
"variants": {"nodes": all_nodes[:limit]},
}
}
metadata: Dict[str, Any] = {"source": "CIViC", "format": "GraphQL"}
if duplicate_names:
metadata["note"] = (
f"Multiple distinct CIViC variant records share the same name(s): "
f"{', '.join(duplicate_names[:5])}. These are separate entries with different "
f"IDs (e.g., from different molecular profiles or evidence contexts) — "
f"use the variant ID to distinguish them."
)
return {
"status": "success",
"data": data,
"metadata": metadata,
}
except Exception as e:
return {"status": "error", "error": f"CIViC API request failed: {str(e)}"}
[docs]
def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Execute the CIViC GraphQL API call."""
tool_name = self.tool_config.get("name", "")
# civic_get_variants_by_gene: resolve gene_name → gene_id if needed
if tool_name == "civic_get_variants_by_gene":
if not arguments.get("gene_id"):
gene_name = (
arguments.get("gene_name")
or arguments.get("gene")
or arguments.get("gene_symbol") # Feature-47A-01
or arguments.get("query")
)
if not gene_name:
return {
"error": "gene_id or gene_name is required for civic_get_variants_by_gene"
}
gene_id = self._lookup_gene_id(gene_name)
if gene_id is None:
return {"error": f"Gene '{gene_name}' not found in CIViC database"}
arguments = dict(arguments)
arguments["gene_id"] = gene_id
return self._get_variants_for_gene_id(
arguments["gene_id"], arguments.get("limit", 500)
)
# Feature-40B-01: civic_search_evidence_items — warn on unsupported gene/variant params.
# Feature-41A-03: also catch molecular_profile_id (integer) — no GraphQL binding.
# Feature-61A-001: extend to catch ANY unrecognized parameter that would be silently
# ignored, causing unfiltered evidence dumps (e.g. description="ESR1", gene_id=38).
if tool_name == "civic_search_evidence_items":
_known_params = {
"molecular_profile",
"disease",
"disease_name",
"therapy",
"status",
"limit",
"evidence_type",
"significance",
"evidence_direction",
"after",
"operation",
}
# Also include names reachable via param_map and array_wrap
_known_params.update(self.param_map.keys())
_known_params.update(self.array_wrap.keys())
# Legacy unsupported params with specific hints
legacy_unsupported = [
p
for p in ("gene", "variant", "gene_name", "molecular_profile_id")
if arguments.get(p)
]
# Any other unrecognized params
other_unknown = [
p
for p in arguments
if p not in _known_params
and p not in ("gene", "variant", "gene_name", "molecular_profile_id")
]
if legacy_unsupported:
gene = arguments.get("gene") or arguments.get("gene_name")
variant = arguments.get("variant")
mol_id = arguments.get("molecular_profile_id")
profile_hint = ""
if gene and variant:
profile_hint = f' Try: molecular_profile="{gene} {variant}"'
elif gene:
profile_hint = f' Try: molecular_profile="{gene}"'
elif mol_id:
profile_hint = (
f" For integer ID filtering, use civic_get_evidence_item with the "
f"evidence ID, or civic_get_variant with the variant ID."
)
return {
"error": f"Unsupported parameter(s) for civic_search_evidence_items: {', '.join(legacy_unsupported)}. "
"Supported filters: molecular_profile (string, e.g. 'BRAF V600E'), "
"therapy, disease, status, evidence_type (PREDICTIVE, DIAGNOSTIC, "
"PROGNOSTIC, PREDISPOSING, ONCOGENIC, FUNCTIONAL)." + profile_hint,
}
if other_unknown:
return {
"status": "error",
"error": (
f"Unrecognized parameter(s) for civic_search_evidence_items:"
f" {', '.join(sorted(other_unknown))}. These are silently ignored by"
f" the CIViC GraphQL API, returning unfiltered results."
f" Supported filters: molecular_profile, disease, therapy, status,"
f" evidence_type, significance, limit."
),
}
# civic_search_variants: if gene/gene_name provided, look up gene_id then get variants.
# Feature-41A-01: also handle combined gene+query — get gene variants, filter client-side.
if tool_name == "civic_search_variants":
gene_name = (
arguments.get("gene")
or arguments.get("gene_name")
or arguments.get("gene_symbol") # Feature-47A-01
)
# Feature-53B-004: variant_name parameter was silently ignored — only "query" was
# checked. Users naturally pass variant_name='S249C' expecting it to filter
# variants client-side, just like query='S249C' does.
# Feature-54A-005: when BOTH query and variant_name are provided and differ,
# silently dropping one is confusing. Apply AND logic and add a note.
raw_query = arguments.get("query")
raw_variant_name = arguments.get("variant_name") or arguments.get("variant")
# Feature-66A-002: strip leading/trailing whitespace from query/variant inputs
if raw_query and isinstance(raw_query, str):
raw_query = raw_query.strip()
if raw_variant_name and isinstance(raw_variant_name, str):
raw_variant_name = raw_variant_name.strip()
_both_provided = (
raw_query and raw_variant_name and raw_query != raw_variant_name
)
if _both_provided:
# AND logic: we'll filter on both below
query_term = raw_query
_secondary_term = raw_variant_name
else:
query_term = raw_query or raw_variant_name
_secondary_term = None
# Feature-66A-001: variant_name was silently ignored on the no-gene GraphQL path
# because _build_graphql_query only reads arguments["query"]. Forward query_term
# into arguments["query"] so the no-gene path correctly filters by variant name.
if query_term and not arguments.get("query"):
arguments = dict(arguments)
arguments["query"] = query_term
if gene_name:
gene_id = self._lookup_gene_id(gene_name)
if gene_id is None:
return {"error": f"Gene '{gene_name}' not found in CIViC database"}
# Feature-43B-01: when gene+query combined, always fetch up to 200 variants
# before client-side filtering; the user's limit applies to the OUTPUT,
# not the pre-filter fetch — otherwise alphabetically early variants may
# block clinically important ones (e.g. FLT3 ITD at position >10).
user_limit = arguments.get("limit")
fetch_limit = 500 if query_term else (user_limit or 500)
result = self._get_variants_for_gene_id(gene_id, fetch_limit)
# If query also provided, filter returned variants by name client-side
if query_term and isinstance(result.get("data"), dict):
gene_data = result["data"].get("gene", {})
nodes = gene_data.get("variants", {}).get("nodes", [])
q_lower = query_term.lower()
filtered = [
v for v in nodes if q_lower in v.get("name", "").lower()
]
# Feature-54A-005: AND logic when both query and variant_name provided
if _secondary_term:
sec_lower = _secondary_term.lower()
filtered = [
v
for v in filtered
if sec_lower in v.get("name", "").lower()
]
result["filter_note"] = (
f"Both query='{raw_query}' and variant_name='{raw_variant_name}' "
f"were provided; applied AND logic (variants matching both terms)."
)
# Truncate to user-requested limit AFTER filtering
if user_limit:
filtered = filtered[:user_limit]
gene_data.get("variants", {})["nodes"] = filtered
# Feature-48B-02: recompute duplicate names among filtered results only.
# The metadata.note from _get_variants_for_gene_id cites duplicates from ALL
# gene variants; after filtering, only filtered duplicates are relevant.
if "metadata" in result:
filtered_name_count: Dict[str, int] = {}
for v in filtered:
n = v.get("name", "")
filtered_name_count[n] = filtered_name_count.get(n, 0) + 1
filtered_dups = [
n for n, c in filtered_name_count.items() if c > 1
]
if filtered_dups:
result["metadata"]["note"] = (
f"Multiple distinct CIViC variant records share the same name(s): "
f"{', '.join(filtered_dups[:5])}. These are separate entries — "
f"use the variant ID to distinguish them."
)
else:
result["metadata"].pop("note", None)
# Feature-43A-04: when gene+query filter returns empty, add a helpful note.
# Feature-44A-01: also provide gene-specific alternative query terms for
# common oncology terms that CIViC names differently (e.g., "truncating"
# → use "LOSS" or "Loss-of-function" for BRCA1/BRCA2 in CIViC).
if not filtered:
fusion_hint = ""
alt_hint = ""
if "fusion" in q_lower:
# Feature-56A-006: BICC1 was hardcoded as an example but it's only a
# real partner for FGFR2, not other genes. Use a validated lookup table.
_FUSION_EXAMPLES = {
"ALK": "EML4",
"ROS1": "CD74",
"RET": "KIF5B",
"NTRK1": "TPM3",
"FGFR2": "BICC1",
"FGFR3": "TACC3",
"PDGFRA": "FIP1L1",
"BCR": "ABL1",
"ABL1": "BCR",
}
_partner = _FUSION_EXAMPLES.get(gene_name, "PARTNER")
_example = (
f"'{_partner}::{gene_name} Fusion'"
if _partner != "PARTNER"
else f"'{gene_name}::GENE2 Fusion'"
)
fusion_hint = (
f" CIViC stores fusion events as molecular profiles "
f"rather than gene variants. Try civic_search_evidence_items "
f"with molecular_profile='{gene_name}::PARTNER Fusion' "
f"(e.g., {_example})."
)
# Provide alternative query suggestions for common terms CIViC names differently
_alt_suggestions: Dict[str, str] = {
"truncat": "Try query='LOSS' or query='Loss-of-function' — CIViC uses these terms for truncating/LOF variants.",
"loss of function": "Try query='LOSS' or query='Loss-of-function'.",
"lof": "Try query='LOSS' or query='Loss-of-function'.",
"amplif": "Try query='AMPLIFICATION'.",
"delet": "Try query='DELETION' or query='LOSS'.",
"overexpress": "Try query='OVEREXPRESSION'.",
"missense": "Try query='V600E' or another specific amino acid change — CIViC indexes by specific variant names.",
}
for term, suggestion in _alt_suggestions.items():
if term in q_lower:
alt_hint = f" {suggestion}"
break
# Show available variant names to guide the user
available_names = [v.get("name", "") for v in nodes[:10]]
available_str = (
f" Available {gene_name} variant names include: {', '.join(available_names[:8])}."
if available_names
else ""
)
result["note"] = (
f"No variants found matching '{query_term}' in {gene_name}."
+ fusion_hint
+ alt_hint
+ available_str
)
return result
# Track input normalizations to disclose them in the result (Feature-55A-008).
_therapy_normalized_from = None
_mp_normalized_from = None
# Feature-53B-002: CIViC therapy names are case-sensitive (stored as Title Case, e.g.,
# "Erdafitinib" not "erdafitinib"). Auto-normalize to Title Case when the input is
# entirely lowercase or uppercase, to avoid silent empty results from case mismatches.
# Feature-63B-002: CIViC status uses a strict GraphQL enum (ACCEPTED, SUBMITTED, etc.)
# that requires uppercase. Normalize status to uppercase to prevent enum validation
# errors when users pass lowercase/mixed-case values like "accepted".
if tool_name == "civic_search_evidence_items":
therapy = arguments.get("therapy")
if therapy and isinstance(therapy, str):
if therapy == therapy.lower() or therapy == therapy.upper():
_therapy_normalized_from = therapy
arguments = dict(arguments)
arguments["therapy"] = therapy.title()
status_val = arguments.get("status")
if (
status_val
and isinstance(status_val, str)
and status_val != status_val.upper()
):
arguments = dict(arguments)
arguments["status"] = status_val.upper()
# Feature-55B-005: CIViC uses double-colon notation for fusion molecular profiles
# (e.g., "BCR::ABL1 Fusion", "EML4::ALK Fusion"). Users often write hyphenated
# fusions (e.g., "BCR-ABL1 Fusion") which silently returns 0 results.
# Feature-56A-001: the original regex matched mutation notation too (e.g., EGFR-T790M,
# BRAF-V600E, KRAS-G12C) because T790M/V600E/G12C start with an uppercase letter.
# Fix: skip normalization when the second part matches HGVS protein-change format
# (single uppercase letter + digits + uppercase letter/asterisk, e.g. T790M, G12C).
if tool_name in ("civic_search_evidence_items", "civic_search_variants"):
import re as _re
mol_profile = arguments.get("molecular_profile")
if mol_profile and isinstance(mol_profile, str):
def _maybe_fuse(m: "_re.Match") -> str:
"""Replace GENE1-GENE2 with GENE1::GENE2, but not GENE-MutationNotation."""
second = m.group(2)
# Protein-change notation: single letter + digits + letter/asterisk (e.g. T790M)
if _re.match(r"^[A-Z]\d+[A-Z*]?$", second):
return m.group(0) # leave unchanged
return m.group(1) + "::" + second
normalized_mp = _re.sub(
r"\b([A-Z][A-Z0-9]*)-([A-Z][A-Z0-9]+)\b",
_maybe_fuse,
mol_profile,
)
if normalized_mp != mol_profile:
_mp_normalized_from = mol_profile
arguments = dict(arguments)
arguments["molecular_profile"] = normalized_mp
try:
# Build GraphQL query
payload = self._build_graphql_query(arguments)
# Make GraphQL request
response = requests.post(
CIVIC_GRAPHQL_URL,
json=payload,
timeout=self.timeout,
headers={
"Content-Type": "application/json",
"Accept": "application/json",
"User-Agent": "ToolUniverse/CIViC",
},
)
response.raise_for_status()
data = response.json()
# Check for GraphQL errors
if "errors" in data:
return {
"error": "GraphQL query errors",
"errors": data["errors"],
"query": arguments,
}
result = {
"status": "success",
"data": data.get("data", {}),
"metadata": {
"source": "CIViC (Clinical Interpretation of Variants in Cancer)",
"format": "GraphQL",
"endpoint": CIVIC_GRAPHQL_URL,
},
}
# Feature-55A-008 / Feature-55B-005: disclose any input normalizations applied.
_norm_parts = []
if _therapy_normalized_from:
_norm_parts.append(
f"therapy '{_therapy_normalized_from}' → '{arguments.get('therapy')}' (CIViC uses Title Case)"
)
if _mp_normalized_from:
_norm_parts.append(
f"molecular_profile '{_mp_normalized_from}' → '{arguments.get('molecular_profile')}'"
" (CIViC uses double-colon '::' for fusion gene pairs)"
)
if _norm_parts:
result["normalization_note"] = (
"Input auto-normalized: " + "; ".join(_norm_parts) + "."
)
# Feature-50A-001: warn when civic_search_evidence_items combined
# molecular_profile+disease filter returns 0 results.
# Feature-52A-004: auto-probe with molecular_profile only to surface the actual
# CIViC disease names that have evidence, so users can correct the disease name.
if tool_name == "civic_search_evidence_items":
mol_profile = arguments.get("molecular_profile")
disease = arguments.get("disease") or arguments.get("disease_name")
# Feature-63B-002: CIViC GraphQL uses substring/contains matching for
# molecularProfileName — compound profiles like "BRAF V600E OR KIAA1549::BRAF
# Fusion" are returned when filtering for "BRAF V600E" because the substring
# matches. Disclose non-exact matches so users can confirm relevance.
if mol_profile:
_ev_nodes_exact_check = (
result.get("data", {}).get("evidenceItems", {}).get("nodes", [])
)
if _ev_nodes_exact_check:
_mp_lower = mol_profile.lower()
_non_exact_profiles = [
node.get("molecularProfile", {}).get("name", "")
for node in _ev_nodes_exact_check
if node.get("molecularProfile", {}).get("name", "").lower()
!= _mp_lower
and node.get("molecularProfile", {}).get("name", "")
]
if _non_exact_profiles:
_unique_non_exact = sorted(set(_non_exact_profiles))[:3]
result["molecular_profile_match_note"] = (
f"CIViC uses substring/contains matching for molecular_profile "
f"— results include any profile whose name contains "
f"'{mol_profile}' as a substring, not only exact matches. "
f"Non-exact profiles in these results: "
+ ", ".join(f"'{p}'" for p in _unique_non_exact)
+ ". Review the molecularProfile.name field in each result to "
"confirm clinical relevance."
)
# Feature-57A-005: fire when ANY disease filter is set (not just mol_profile+disease)
if disease:
evidence_nodes = (
result.get("data", {}).get("evidenceItems", {}).get("nodes", [])
)
if len(evidence_nodes) == 0:
# Auto-probe: re-run without disease filter to find actual disease names
actual_diseases: list = []
probe_nodes: list = []
try:
probe_args = {
k: v
for k, v in arguments.items()
if k not in ("disease", "disease_name")
}
probe_args["limit"] = 50
probe_payload = self._build_graphql_query(probe_args)
probe_resp = requests.post(
CIVIC_GRAPHQL_URL,
json=probe_payload,
timeout=self.timeout,
headers={
"Content-Type": "application/json",
"Accept": "application/json",
},
)
probe_nodes = (
probe_resp.json()
.get("data", {})
.get("evidenceItems", {})
.get("nodes", [])
)
actual_diseases = sorted(
{
node.get("disease", {}).get("name", "")
for node in probe_nodes
if node.get("disease", {}).get("name")
}
)
except Exception:
pass
# Build context string for hint message
therapy = arguments.get("therapy")
if mol_profile:
_ctx = f"molecular_profile='{mol_profile}'"
elif therapy:
_ctx = f"therapy='{therapy}'"
else:
_ctx = "the specified filter"
if actual_diseases:
disease_hint = (
f" CIViC has {len(probe_nodes)} evidence items for "
f"{_ctx} across these diseases: "
+ ", ".join(f"'{d}'" for d in actual_diseases[:10])
+ ". Use one of these exact disease names."
)
else:
disease_hint = (
f" Try retrying with {_ctx} "
"(remove the disease filter) to see all evidence."
)
# Feature-59A-001: disclose ACCEPTED filter that may be hiding evidence
_status_used = arguments.get(
"status", self.variable_defaults.get("status", "ACCEPTED")
)
_status_note = ""
if str(_status_used).upper() == "ACCEPTED":
_status_note = (
" CIViC defaults to ACCEPTED evidence only — "
"add status='SUBMITTED' to include pre-review evidence."
)
result["warning"] = (
f"No evidence items found for {_ctx} "
f"AND disease='{disease}'. CIViC applies AND logic across all "
"filters, and disease names must match CIViC's exact taxonomy "
"(e.g., 'Lung Non-small Cell Carcinoma' not 'NSCLC' or "
"'Non-small Cell Lung Carcinoma', "
"'Chronic Myelogenous Leukemia, BCR-ABL1+' not 'CML', "
"'Pancreatic Ductal Carcinoma' not 'Pancreatic Adenocarcinoma')."
+ disease_hint
+ _status_note
)
# Feature-56A-002: when molecular_profile alone returns 0 results (no disease,
# no therapy filter), warn — especially if input was auto-normalized (fusion fix
# may have converted a mutation like EGFR-T790M to EGFR::T790M incorrectly).
therapy = arguments.get("therapy")
if mol_profile and not disease and not therapy:
evidence_nodes = (
result.get("data", {}).get("evidenceItems", {}).get("nodes", [])
)
if len(evidence_nodes) == 0:
mp_warn = f"No evidence items found for molecular_profile='{mol_profile}'."
# Feature-59A-001: ACCEPTED filter may be hiding evidence. Disclose the active
# status filter so users know to try status='SUBMITTED' if evidence exists
# only in pre-review form (common for rare cancers and newer variants).
_status_used = arguments.get(
"status", self.variable_defaults.get("status", "ACCEPTED")
)
if str(_status_used).upper() == "ACCEPTED":
mp_warn += (
" CIViC defaults to ACCEPTED (peer-reviewed) evidence only. "
"If this variant has recent or emerging evidence it may be "
"SUBMITTED (pre-review) — add status='SUBMITTED' to include it."
)
if _mp_normalized_from:
mp_warn += (
f" Note: your input '{_mp_normalized_from}' was auto-normalized"
f" to '{mol_profile}' as a gene fusion. If this is a point"
" mutation (e.g., EGFR T790M), use space-separated notation"
" instead (CIViC does not use hyphens for mutations)."
)
elif _re.search(
r"\b[A-Z][A-Z0-9]*-[A-Z]\d+[A-Z*]?\b", mol_profile
):
# Input looks like GENE-Mutation (e.g., EGFR-T790M) — not normalized
# because we correctly identified it as a mutation, not a fusion.
# Suggest space-separated notation which CIViC actually uses.
space_form = mol_profile.replace("-", " ", 1)
mp_warn += (
f" If '{mol_profile}' is a point mutation, try"
f" molecular_profile='{space_form}' (CIViC uses"
" 'GENE Mutation' with a space, not a hyphen)."
)
result["warning"] = mp_warn
# Feature-53B-002: warn when molecular_profile+therapy returns 0 results.
# Feature-54A-001: auto-probe available therapies for the molecular profile
# so users can identify the correct exact therapy name from CIViC.
if mol_profile and therapy and not disease:
evidence_nodes = (
result.get("data", {}).get("evidenceItems", {}).get("nodes", [])
)
if len(evidence_nodes) == 0:
# Auto-probe: re-run without therapy filter to find actual therapy names
available_therapies: list = []
try:
probe_args = {
k: v
for k, v in arguments.items()
if k not in ("therapy",)
}
probe_args["limit"] = 50
probe_payload = self._build_graphql_query(probe_args)
probe_resp = requests.post(
CIVIC_GRAPHQL_URL,
json=probe_payload,
timeout=self.timeout,
headers={
"Content-Type": "application/json",
"Accept": "application/json",
},
)
probe_nodes = (
probe_resp.json()
.get("data", {})
.get("evidenceItems", {})
.get("nodes", [])
)
available_therapies = sorted(
{
t.get("name", "")
for node in probe_nodes
for t in node.get("therapies", [])
if t.get("name")
}
)
except Exception:
pass
if available_therapies:
therapy_hint = (
f" CIViC has evidence for '{mol_profile}' with these "
f"therapies: "
+ ", ".join(f"'{t}'" for t in available_therapies[:10])
+ ". Use one of these exact therapy names."
)
else:
therapy_hint = (
f" Try removing the therapy filter and searching only by "
f"molecular_profile='{mol_profile}' to see all available evidence."
)
result["therapy_warning"] = (
f"No evidence items found for molecular_profile='{mol_profile}' "
f"AND therapy='{therapy}'. CIViC therapy names are exact-match "
"and case-sensitive (stored as Title Case, e.g., 'Erdafitinib', "
"'Trastuzumab', 'Lapatinib'). The therapy name was auto-normalized "
"to Title Case, but may still not match CIViC's exact entry."
+ therapy_hint
)
# Feature-67B-002: detect "GENE VARIANT" combined input in variant_name returning
# empty — CIViC stores variants without gene prefix (e.g., "L858R" not "EGFR L858R").
if tool_name == "civic_search_variants":
_variant_nodes = (
result.get("data", {}).get("variants", {}).get("nodes", [])
)
if len(_variant_nodes) == 0:
import re as _re_vn
_raw_vn = (
arguments.get("variant_name")
or arguments.get("variant")
or arguments.get("query")
or ""
)
if _raw_vn and _re_vn.match(r"^[A-Z][A-Z0-9]+\s+\S", str(_raw_vn)):
_vn_parts = str(_raw_vn).split(None, 1)
result["hint"] = (
f"No variants found for '{_raw_vn}'. CIViC stores variants "
f"without the gene prefix — try gene_name='{_vn_parts[0]}' "
f"with variant_name='{_vn_parts[1]}'."
)
# Feature-60A-001: when evidence items ARE returned under ACCEPTED-only filter,
# disclose the filter so users know SUBMITTED items may also exist.
if tool_name == "civic_search_evidence_items":
evidence_nodes = (
result.get("data", {}).get("evidenceItems", {}).get("nodes", [])
)
if len(evidence_nodes) > 0:
_status_used = arguments.get(
"status", self.variable_defaults.get("status", "ACCEPTED")
)
if str(_status_used).upper() == "ACCEPTED":
result["status_note"] = (
f"Showing {len(evidence_nodes)} ACCEPTED (peer-reviewed) evidence"
" items. Additional SUBMITTED (pre-review) items may exist —"
" add status='SUBMITTED' to include them."
)
return result
except requests.RequestException as e:
return {
"status": "error",
"error": f"CIViC API request failed: {str(e)}",
"query": arguments,
}
except ValueError as e:
return {"status": "error", "error": str(e), "query": arguments}
except Exception as e:
return {
"status": "error",
"error": f"Unexpected error: {str(e)}",
"query": arguments,
}