Source code for tooluniverse.clinicaltrials_tool

"""
ClinicalTrials.gov REST API v2 tool for ToolUniverse.

ClinicalTrials.gov is the world's largest registry of clinical research studies,
maintained by the U.S. National Library of Medicine. It covers 572,000+ trials
across 200+ countries.

API: https://clinicaltrials.gov/data-api/api
No authentication required. Public access.
"""

import requests
from typing import Any

from .base_rest_tool import BaseRESTTool
from .tool_registry import register_tool

CLINICALTRIALS_BASE = "https://clinicaltrials.gov/api/v2"


[docs] @register_tool("CTGovAPITool") class CTGovAPITool(BaseRESTTool): """ Tool for querying the ClinicalTrials.gov API v2. Provides access to 572,000+ clinical trial records including: - Study protocol information (design, eligibility, interventions) - Recruitment status and enrollment data - Results and outcome measures - Sponsor and contact information No authentication required. """
[docs] def __init__(self, tool_config: dict): super().__init__(tool_config) self.timeout = 30 self.operation = tool_config.get("fields", {}).get("operation", "search")
[docs] def run(self, arguments: dict) -> dict: """Execute the ClinicalTrials.gov API call.""" try: return self._query(arguments) except requests.exceptions.Timeout: return { "error": f"ClinicalTrials.gov request timed out after {self.timeout}s" } except requests.exceptions.ConnectionError: return { "error": "Failed to connect to ClinicalTrials.gov. Check network connectivity." } except requests.exceptions.HTTPError as e: return {"error": f"ClinicalTrials.gov HTTP error: {e.response.status_code}"} except Exception as e: return {"error": f"Unexpected error querying ClinicalTrials.gov: {str(e)}"}
[docs] def _query(self, arguments: dict) -> dict: """Route to the appropriate endpoint.""" op = self.operation if op == "search": return self._search_studies(arguments) elif op == "get_study": return self._get_study(arguments) elif op == "stats_size": return self._get_stats_size() elif op == "field_values": return self._get_field_values(arguments) else: return {"error": f"Unknown operation: {op}"}
[docs] def _search_studies(self, arguments: dict) -> dict: """Search for clinical trials with various filters.""" params: dict[str, Any] = {"format": "json"} # Map tool arguments to API query parameters if arguments.get("query_cond"): params["query.cond"] = arguments["query_cond"] if arguments.get("query_intr"): params["query.intr"] = arguments["query_intr"] if arguments.get("query_term"): params["query.term"] = arguments["query_term"] if arguments.get("intervention"): params["query.intr"] = arguments["intervention"] if arguments.get("sponsor"): params["query.spons"] = arguments["sponsor"] if arguments.get("filter_status"): params["filter.overallStatus"] = arguments["filter_status"] if arguments.get("filter_phase"): # API uses aggFilters for phase, not filter.phase # Format: "phase:1,2,3" -> map PHASE1->1, PHASE2->2, PHASE3->3, PHASE4->4 phase_raw = arguments["filter_phase"] phase_nums = [] for p in phase_raw.replace(" ", "").split(","): if "PHASE" in p.upper(): num = p.upper().replace("PHASE", "") if num.isdigit(): phase_nums.append(num) if phase_nums: params["aggFilters"] = f"phase:{','.join(phase_nums)}" if arguments.get("filter_study_type"): params["filter.studyType"] = arguments["filter_study_type"] page_size = arguments.get("page_size", 10) params["pageSize"] = min(int(page_size), 1000) if arguments.get("next_page_token"): params["pageToken"] = arguments["next_page_token"] # Specify fields to return for efficiency params["fields"] = ( "NCTId,BriefTitle,OfficialTitle,OverallStatus,StartDate,CompletionDate," "StudyType,Phase,EnrollmentCount,Condition,InterventionName,LeadSponsorName," "LocationCountry" ) url = f"{CLINICALTRIALS_BASE}/studies" resp = requests.get(url, params=params, timeout=self.timeout) resp.raise_for_status() data = resp.json() studies = [] for s in data.get("studies", []): proto = s.get("protocolSection", {}) id_mod = proto.get("identificationModule", {}) status_mod = proto.get("statusModule", {}) design_mod = proto.get("designModule", {}) cond_mod = proto.get("conditionsModule", {}) interv_mod = proto.get("armsInterventionsModule", {}) sponsor_mod = proto.get("sponsorCollaboratorsModule", {}) # Extract interventions interventions = [ iv.get("name") for iv in interv_mod.get("interventions", []) if iv.get("name") ] studies.append( { "nct_id": id_mod.get("nctId"), "brief_title": id_mod.get("briefTitle"), "official_title": id_mod.get("officialTitle"), "status": status_mod.get("overallStatus"), "start_date": status_mod.get("startDateStruct", {}).get("date"), "completion_date": status_mod.get("completionDateStruct", {}).get( "date" ), "study_type": design_mod.get("studyType"), "phases": design_mod.get("phases", []), "enrollment": design_mod.get("enrollmentInfo", {}).get("count"), "conditions": cond_mod.get("conditions", []), "interventions": interventions[:5], "sponsor": sponsor_mod.get("leadSponsor", {}).get("name"), } ) return { "data": { "studies": studies, "total_count": data.get("totalCount"), "next_page_token": data.get("nextPageToken"), }, "metadata": { "source": "ClinicalTrials.gov", "api_version": "v2", "returned_count": len(studies), }, }
[docs] def _get_study(self, arguments: dict) -> dict: """Get full details for a single study by NCT ID.""" nct_id = arguments.get("nct_id", "").strip() if not nct_id: return {"error": "nct_id parameter is required (e.g., 'NCT04280705')"} url = f"{CLINICALTRIALS_BASE}/studies/{nct_id}" resp = requests.get(url, params={"format": "json"}, timeout=self.timeout) resp.raise_for_status() data = resp.json() proto = data.get("protocolSection", {}) id_mod = proto.get("identificationModule", {}) status_mod = proto.get("statusModule", {}) desc_mod = proto.get("descriptionModule", {}) design_mod = proto.get("designModule", {}) cond_mod = proto.get("conditionsModule", {}) interv_mod = proto.get("armsInterventionsModule", {}) outcomes_mod = proto.get("outcomesModule", {}) elig_mod = proto.get("eligibilityModule", {}) contacts_mod = proto.get("contactsLocationsModule", {}) sponsor_mod = proto.get("sponsorCollaboratorsModule", {}) refs_mod = proto.get("referencesModule", {}) # Extract arms/interventions arms = [] for arm in interv_mod.get("armGroups", [])[:10]: arms.append( { "label": arm.get("label"), "type": arm.get("type"), "description": arm.get("description"), "intervention_names": arm.get("interventionNames", []), } ) interventions = [] for iv in interv_mod.get("interventions", [])[:10]: interventions.append( { "name": iv.get("name"), "type": iv.get("type"), "description": iv.get("description"), } ) # Extract primary outcomes primary_outcomes = [] for o in outcomes_mod.get("primaryOutcomes", [])[:5]: primary_outcomes.append( { "measure": o.get("measure"), "description": o.get("description"), "time_frame": o.get("timeFrame"), } ) # Extract locations (first 5) locations = [] for loc in contacts_mod.get("locations", [])[:5]: locations.append( { "facility": loc.get("facility"), "city": loc.get("city"), "country": loc.get("country"), "status": loc.get("status"), } ) # Extract references refs = [] for ref in refs_mod.get("references", [])[:5]: refs.append( { "pmid": ref.get("pmid"), "citation": ref.get("citation"), } ) return { "data": { "nct_id": id_mod.get("nctId"), "brief_title": id_mod.get("briefTitle"), "official_title": id_mod.get("officialTitle"), "organization": id_mod.get("organization", {}).get("fullName"), "status": status_mod.get("overallStatus"), "why_stopped": status_mod.get("whyStopped"), "start_date": status_mod.get("startDateStruct", {}).get("date"), "completion_date": status_mod.get("completionDateStruct", {}).get( "date" ), "brief_summary": desc_mod.get("briefSummary"), "study_type": design_mod.get("studyType"), "phases": design_mod.get("phases", []), "enrollment": design_mod.get("enrollmentInfo", {}).get("count"), "allocation": design_mod.get("designInfo", {}).get("allocation"), "masking": design_mod.get("designInfo", {}) .get("maskingInfo", {}) .get("masking"), "primary_purpose": design_mod.get("designInfo", {}).get( "primaryPurpose" ), "conditions": cond_mod.get("conditions", []), "keywords": cond_mod.get("keywords", []), "sponsor": sponsor_mod.get("leadSponsor", {}).get("name"), "eligibility_criteria": elig_mod.get("eligibilityCriteria"), "minimum_age": elig_mod.get("minimumAge"), "maximum_age": elig_mod.get("maximumAge"), "sex": elig_mod.get("sex"), "healthy_volunteers": elig_mod.get("healthyVolunteers"), "arms": arms, "interventions": interventions, "primary_outcomes": primary_outcomes, "locations": locations, "references": refs, }, "metadata": { "nct_id": nct_id, "source": "ClinicalTrials.gov", "api_version": "v2", "has_results": data.get("resultsSection") is not None, }, }
[docs] def _get_stats_size(self) -> dict: """Get aggregate statistics about the ClinicalTrials.gov database.""" url = f"{CLINICALTRIALS_BASE}/stats/size" resp = requests.get(url, timeout=self.timeout) resp.raise_for_status() data = resp.json() return { "data": { "total_studies": data.get("totalStudies") or data.get("studiesCount"), "average_byte_size": data.get("averageByteSize"), "byte_size_percentiles": data.get("percentiles", {}), "largest_studies": data.get("largestStudies", [])[:5], }, "metadata": { "source": "ClinicalTrials.gov", "api_version": "v2", }, }
[docs] def _get_field_values(self, arguments: dict) -> dict: """Get value distribution for a specific field across studies.""" field = arguments.get("field", "") if not field: return { "error": "field parameter is required (e.g., 'Phase', 'OverallStatus')" } # The endpoint returns ALL fields - we filter client-side url = f"{CLINICALTRIALS_BASE}/stats/field/values" resp = requests.get(url, timeout=self.timeout) resp.raise_for_status() all_fields = resp.json() # list of field objects # Find matching field (case-insensitive on 'piece' name) field_lower = field.lower() matching = [f for f in all_fields if f.get("piece", "").lower() == field_lower] if not matching: # Try partial match matching = [ f for f in all_fields if field_lower in f.get("piece", "").lower() ] if not matching: available = sorted({f.get("piece") for f in all_fields if f.get("piece")})[ :20 ] return { "error": f"Field '{field}' not found. Available fields include: {available}" } field_obj = matching[0] top_values = field_obj.get("topValues", []) page_size = min(int(arguments.get("page_size", 50)), len(top_values)) values = [ { "value": v.get("value"), "studies_count": v.get("studiesCount"), } for v in top_values[:page_size] ] return { "data": { "field": field_obj.get("piece"), "field_path": field_obj.get("field"), "field_type": field_obj.get("type"), "unique_values_count": field_obj.get("uniqueValuesCount"), "missing_studies_count": field_obj.get("missingStudiesCount"), "values": values, }, "metadata": { "source": "ClinicalTrials.gov", "api_version": "v2", }, }