Source code for tooluniverse.pharmgkb_tool
"""
PharmGKB API tool for ToolUniverse.
PharmGKB is a comprehensive resource that curates knowledge about the impact
of genetic variation on drug response for clinicians and researchers.
API Documentation: https://api.pharmgkb.org/v1/
"""
import requests
from typing import Dict, Any
from .base_tool import BaseTool
from .tool_registry import register_tool
from .http_utils import request_with_retry
# Base URL for PharmGKB/ClinPGx REST API
PHARMGKB_BASE_URL = "https://api.clinpgx.org/v1"
[docs]
@register_tool("PharmGKBTool")
class PharmGKBTool(BaseTool):
"""
Tool for querying PharmGKB REST API.
PharmGKB provides pharmacogenomics data:
- Drug-gene-variant clinical annotations
- CPIC dosing guidelines
- Drug and gene details
- Pharmacogenetic pathways
No authentication required for most endpoints.
"""
[docs]
def __init__(self, tool_config: Dict[str, Any]):
super().__init__(tool_config)
self.timeout = tool_config.get("timeout", 30)
self.operation = tool_config.get("fields", {}).get("operation", "search_drugs")
self.session = requests.Session()
[docs]
def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Execute the PharmGKB API call."""
operation = self.operation
if operation == "search_drugs":
return self._search_entity("Chemical", arguments)
elif operation == "drug_details":
return self._get_entity_details("Chemical", arguments)
elif operation == "search_genes":
return self._search_entity("Gene", arguments)
elif operation == "gene_details":
return self._get_entity_details("Gene", arguments)
elif operation == "clinical_annotations":
return self._get_clinical_annotations(arguments)
elif operation == "search_variants":
return self._search_entity("Variant", arguments)
elif operation == "dosing_guidelines":
return self._get_dosing_guidelines(arguments)
elif operation == "drug_label_annotations":
return self._get_drug_label_annotations(arguments)
elif operation == "pathway":
return self._get_pathway(arguments)
elif operation == "variant_annotations":
return self._get_variant_annotations(arguments)
else:
return self._error(f"Unknown operation: {operation}")
[docs]
def _error(self, message: str) -> Dict[str, Any]:
return {"status": "error", "error": message}
[docs]
def _request_json(
self, url: str, params: Dict[str, Any]
) -> tuple[int, Dict[str, Any], str]:
try:
response = request_with_retry(
self.session,
"GET",
url,
params=params,
timeout=self.timeout,
max_attempts=4,
backoff_seconds=0.75,
)
except requests.RequestException as e:
return 0, {}, f"PharmGKB API request failed: {str(e)}"
try:
payload = response.json()
except ValueError:
payload = {}
if response.status_code >= 400:
detail = (
payload.get("error")
if isinstance(payload, dict)
else response.text[:200]
)
return (
response.status_code,
payload,
f"PharmGKB API error {response.status_code}: {detail}",
)
if not payload:
return (
response.status_code,
payload,
"PharmGKB API returned non-JSON or empty response",
)
return response.status_code, payload, ""
[docs]
def _search_entity(
self, entity_type: str, arguments: Dict[str, Any]
) -> Dict[str, Any]:
"""Search for drugs, genes, or variants."""
query = (
arguments.get("query")
or arguments.get("drug_name")
or arguments.get("name")
or arguments.get("drug")
or ""
)
if not query:
return self._error("query parameter is required")
params = {"view": "base"}
if entity_type == "Gene":
params["symbol"] = query
else:
params["name"] = query
status_code, api_response, error = self._request_json(
f"{PHARMGKB_BASE_URL}/data/{entity_type.lower()}",
params,
)
if status_code == 404 and entity_type == "Variant":
# Gene name passed to variant search — try gene lookup as fallback
_, gene_resp, gene_err = self._request_json(
f"{PHARMGKB_BASE_URL}/data/gene",
{"symbol": query, "view": "base"},
)
if not gene_err:
gene_data = gene_resp.get("data", [])
if gene_data:
gene_id = gene_data[0].get("id", "")
return {
"status": "success",
"data": [],
"note": (
f"'{query}' is a gene symbol, not a variant rsID. "
f"PharmGKB gene found: {gene_id}. "
f"Use PharmGKB_get_gene_details with gene_id='{gene_id}' "
f"for gene-level pharmacogenomics data, or search with an rsID "
f"(e.g., 'rs1065852') for a specific variant."
),
}
return {
"status": "success",
"data": [],
"note": f"No variants found matching '{query}'. Use an rsID (e.g., 'rs1065852') to search for a specific variant.",
}
if status_code == 404:
return {"status": "success", "data": []}
if error:
return self._error(error)
results = api_response.get("data", api_response)
return {"status": "success", "data": results}
[docs]
def _get_entity_details(
self, entity_type: str, arguments: Dict[str, Any]
) -> Dict[str, Any]:
"""Get details for a specific entity by PharmGKB ID."""
# Handle both chemical_id and drug_id interchangeably
if entity_type == "Chemical":
entity_id = (
arguments.get("chemical_id")
or arguments.get("drug_id")
or arguments.get("id")
)
else:
entity_id = arguments.get(f"{entity_type.lower()}_id") or arguments.get(
"id"
)
if not entity_id:
return self._error(f"{entity_type.lower()}_id parameter is required")
_, api_response, error = self._request_json(
f"{PHARMGKB_BASE_URL}/data/{entity_type.lower()}/{entity_id}",
{"view": "base"},
)
if error:
return self._error(error)
result = api_response.get("data", api_response)
return {"status": "success", "data": result}
[docs]
def _get_clinical_annotations(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Get clinical annotations. Best retrieved by specific ID or filtered."""
annotation_id = arguments.get("annotation_id")
if annotation_id:
_, api_response, error = self._request_json(
f"{PHARMGKB_BASE_URL}/data/clinicalAnnotation/{annotation_id}",
{"view": "base"},
)
if error:
return self._error(error)
result = api_response.get("data", api_response)
return {"status": "success", "data": result}
# Feature-121A-001: auto-resolve gene symbol to PharmGKB PA ID for a targeted URL.
# The API rejects relatedGenes.id filter (HTTP 400); provide a direct page URL instead.
gene_symbol = arguments.get("gene") or arguments.get("gene_symbol")
gene_id = arguments.get("gene_id")
if gene_symbol and not gene_id:
_, gene_resp, err = self._request_json(
f"{PHARMGKB_BASE_URL}/data/gene",
{"symbol": gene_symbol, "view": "min"},
)
if not err:
genes = (
gene_resp.get("data", [])
if isinstance(gene_resp.get("data"), list)
else []
)
if genes:
gene_id = genes[0].get("id", "")
if gene_symbol or gene_id:
target_id = gene_id or gene_symbol
url = (
f"https://www.pharmgkb.org/gene/{target_id}/clinicalAnnotation"
if gene_id
else f"https://www.pharmgkb.org/gene?symbol={gene_symbol}"
)
return self._error(
f"PharmGKB API does not support listing annotations by gene symbol. "
f"Browse {url} to find annotation IDs, then call with annotation_id=<id>. "
f"For drug-gene dosing guidelines, use CPIC_list_guidelines instead."
)
return self._error(
"annotation_id is required (e.g., '1447954390'). "
"Browse https://www.pharmgkb.org/clinicalAnnotation to find annotation IDs, "
"or use CPIC_get_guidelines for drug-gene dosing recommendations."
)
[docs]
def _get_dosing_guidelines(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Get CPIC/DPWG dosing guidelines."""
guideline_id = arguments.get("guideline_id")
if guideline_id:
_, api_response, error = self._request_json(
f"{PHARMGKB_BASE_URL}/data/guideline/{guideline_id}",
{"view": "base"},
)
if error:
return self._error(error)
result = api_response.get("data", api_response)
return {"status": "success", "data": result}
# Feature-67A-003: relatedGenes.symbol filter returns HTTP 400 from PharmGKB API.
# Return an error directing users to look up the guideline_id first.
gene_symbol = arguments.get("gene") or arguments.get("gene_id")
if gene_symbol:
return self._error(
f"PharmGKB does not support gene-based guideline lookup. "
f"Use PharmGKB_search_genes to find gene '{gene_symbol}', then use the "
f"returned guideline IDs with guideline_id parameter."
)
return self._error(
"guideline_id is required. Use PharmGKB_search_genes or PharmGKB_search_drugs "
"to find relevant guideline IDs."
)
[docs]
@staticmethod
def _is_no_results(status_code: int) -> bool:
"""HTTP 404 from list endpoints means 'no matches', not a hard failure."""
return status_code == 404
[docs]
def _get_drug_label_annotations(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Get FDA/EMA/HCSC/PMDA drug-label PGx annotations.
Two modes:
- By ID: pass label_id (e.g., 'PA166114907') for a single full annotation.
- By source: pass source (FDA/EMA/HCSC/PMDA, default FDA) to list
available label annotations (id + name), capped by limit.
"""
label_id = arguments.get("label_id") or arguments.get("id")
if label_id:
status_code, api_response, error = self._request_json(
f"{PHARMGKB_BASE_URL}/data/label/{label_id}",
{"view": "base"},
)
if self._is_no_results(status_code):
return {
"status": "success",
"data": [],
"note": f"No drug-label annotation found for id '{label_id}'.",
}
if error:
return self._error(error)
result = api_response.get("data", api_response)
return {"status": "success", "data": result}
source = (arguments.get("source") or "FDA").upper()
valid_sources = {"FDA", "EMA", "HCSC", "PMDA"}
if source not in valid_sources:
return self._error(
f"Invalid source '{source}'. Use one of: {', '.join(sorted(valid_sources))}."
)
try:
limit = int(arguments.get("limit", 50))
except (TypeError, ValueError):
limit = 50
limit = max(1, min(limit, 500))
status_code, api_response, error = self._request_json(
f"{PHARMGKB_BASE_URL}/data/label",
{"source": source, "view": "min"},
)
if self._is_no_results(status_code):
return {
"status": "success",
"data": [],
"note": f"No drug-label annotations found for source '{source}'.",
}
if error:
return self._error(error)
results = api_response.get("data", [])
if not isinstance(results, list):
results = [results]
total = len(results)
truncated = results[:limit]
out: Dict[str, Any] = {"status": "success", "data": truncated}
if total > limit:
out["note"] = (
f"Showing {limit} of {total} {source} drug-label annotations. "
f"Increase 'limit' or call with label_id=<id> for full details."
)
return out
[docs]
def _get_pathway(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Get a pharmacogenomic pathway by PharmGKB pathway ID (e.g., PA145011113)."""
pathway_id = arguments.get("pathway_id") or arguments.get("id")
if not pathway_id:
return self._error(
"pathway_id is required (e.g., 'PA145011113' for Warfarin Pathway, "
"Pharmacokinetics). Browse https://www.pharmgkb.org/pathways to find "
"pathway IDs."
)
status_code, api_response, error = self._request_json(
f"{PHARMGKB_BASE_URL}/data/pathway/{pathway_id}",
{"view": "base"},
)
if self._is_no_results(status_code):
return {
"status": "success",
"data": [],
"note": f"No pathway found for id '{pathway_id}'.",
}
if error:
return self._error(error)
result = api_response.get("data", api_response)
return {"status": "success", "data": result}
[docs]
def _get_variant_annotations(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Get variant-level literature annotations filtered by gene or chemical.
Provide exactly one of:
- gene_id: PharmGKB Gene Accession ID (e.g., 'PA126' for CYP2C9),
queried via location.genes.accessionId.
- chemical_id: PharmGKB Chemical ID (e.g., 'PA451906' for warfarin),
queried via relatedChemicals.accessionId.
"""
gene_id = arguments.get("gene_id") or arguments.get("gene_accession_id")
chemical_id = (
arguments.get("chemical_id")
or arguments.get("drug_id")
or arguments.get("chemical_accession_id")
)
if gene_id:
param_key = "location.genes.accessionId"
param_value = gene_id
elif chemical_id:
param_key = "relatedChemicals.accessionId"
param_value = chemical_id
else:
return self._error(
"gene_id (e.g., 'PA126' for CYP2C9) or chemical_id "
"(e.g., 'PA451906' for warfarin) is required. Use "
"PharmGKB_search_genes / PharmGKB_search_drugs to resolve a "
"name to its PharmGKB Accession ID."
)
try:
limit = int(arguments.get("limit", 50))
except (TypeError, ValueError):
limit = 50
limit = max(1, min(limit, 500))
status_code, api_response, error = self._request_json(
f"{PHARMGKB_BASE_URL}/data/variantAnnotation",
{param_key: param_value, "view": "min"},
)
if self._is_no_results(status_code):
return {
"status": "success",
"data": [],
"note": f"No variant annotations found for {param_key}={param_value}.",
}
if error:
return self._error(error)
results = api_response.get("data", [])
if not isinstance(results, list):
results = [results]
total = len(results)
truncated = results[:limit]
out: Dict[str, Any] = {"status": "success", "data": truncated}
if total > limit:
out["note"] = (
f"Showing {limit} of {total} variant annotations for "
f"{param_key}={param_value}. Increase 'limit' for more."
)
return out