Source code for tooluniverse.default_config

"""Default tool configuration files mapping.

Separated from __init__.py to avoid circular imports.
"""

import os
import json
from pathlib import Path

# Get the current directory where this file is located
current_dir = os.path.dirname(os.path.abspath(__file__))

default_tool_files = {
    "special_tools": os.path.join(current_dir, "data", "special_tools.json"),
    "tool_finder": os.path.join(current_dir, "data", "finder_tools.json"),
    # 'tool_finder_llm': os.path.join(current_dir, 'data', 'tool_finder_llm_config.json'),
    "opentarget": os.path.join(current_dir, "data", "opentarget_tools.json"),
    "fda_drug_label": os.path.join(current_dir, "data", "fda_drug_labeling_tools.json"),
    "monarch": os.path.join(current_dir, "data", "monarch_tools.json"),
    "clinical_trials": os.path.join(
        current_dir, "data", "clinicaltrials_gov_tools.json"
    ),
    "fda_drug_adverse_event": os.path.join(
        current_dir, "data", "fda_drug_adverse_event_tools.json"
    ),
    "fda_drug_adverse_event_detail": os.path.join(
        current_dir, "data", "fda_drug_adverse_event_detail_tools.json"
    ),
    "ChEMBL": os.path.join(current_dir, "data", "chembl_tools.json"),
    "EuropePMC": os.path.join(current_dir, "data", "europe_pmc_tools.json"),
    "semantic_scholar": os.path.join(
        current_dir, "data", "semantic_scholar_tools.json"
    ),
    "pubtator": os.path.join(current_dir, "data", "pubtator_tools.json"),
    "EFO": os.path.join(current_dir, "data", "efo_tools.json"),
    "Enrichr": os.path.join(current_dir, "data", "enrichr_tools.json"),
    "HumanBase": os.path.join(current_dir, "data", "humanbase_tools.json"),
    "OpenAlex": os.path.join(current_dir, "data", "openalex_tools.json"),
    # Literature search tools
    "literature_search": os.path.join(
        current_dir, "data", "literature_search_tools.json"
    ),
    "arxiv": os.path.join(current_dir, "data", "arxiv_tools.json"),
    "crossref": os.path.join(current_dir, "data", "crossref_tools.json"),
    "simbad": os.path.join(current_dir, "data", "simbad_tools.json"),
    "dblp": os.path.join(current_dir, "data", "dblp_tools.json"),
    "pubmed": os.path.join(current_dir, "data", "pubmed_tools.json"),
    "ncbi_nucleotide": os.path.join(current_dir, "data", "ncbi_nucleotide_tools.json"),
    "ncbi_sra": os.path.join(current_dir, "data", "ncbi_sra_tools.json"),
    "doaj": os.path.join(current_dir, "data", "doaj_tools.json"),
    "unpaywall": os.path.join(current_dir, "data", "unpaywall_tools.json"),
    "biorxiv": os.path.join(current_dir, "data", "biorxiv_tools.json"),
    "medrxiv": os.path.join(current_dir, "data", "medrxiv_tools.json"),
    "hal": os.path.join(current_dir, "data", "hal_tools.json"),
    "core": os.path.join(current_dir, "data", "core_tools.json"),
    "pmc": os.path.join(current_dir, "data", "pmc_tools.json"),
    "zenodo": os.path.join(current_dir, "data", "zenodo_tools.json"),
    "openaire": os.path.join(current_dir, "data", "openaire_tools.json"),
    "osf_preprints": os.path.join(current_dir, "data", "osf_preprints_tools.json"),
    "fatcat": os.path.join(current_dir, "data", "fatcat_tools.json"),
    "wikidata_sparql": os.path.join(current_dir, "data", "wikidata_sparql_tools.json"),
    "wikipedia": os.path.join(current_dir, "data", "wikipedia_tools.json"),
    "dbpedia": os.path.join(current_dir, "data", "dbpedia_tools.json"),
    "agents": os.path.join(current_dir, "data", "agentic_tools.json"),
    # Smolagents tool wrapper configs
    "smolagents": os.path.join(current_dir, "data", "smolagent_tools.json"),
    "tool_discovery_agents": os.path.join(
        current_dir, "data", "tool_discovery_agents.json"
    ),
    "web_search_tools": os.path.join(current_dir, "data", "web_search_tools.json"),
    "package_discovery_tools": os.path.join(
        current_dir, "data", "package_discovery_tools.json"
    ),
    "pypi_package_inspector_tools": os.path.join(
        current_dir, "data", "pypi_package_inspector_tools.json"
    ),
    "drug_discovery_agents": os.path.join(
        current_dir, "data", "drug_discovery_agents.json"
    ),
    "dataset": os.path.join(current_dir, "data", "dataset_tools.json"),
    # 'mcp_clients': os.path.join(current_dir, 'data', 'mcp_client_tools_example.json'),
    "mcp_auto_loader_txagent": os.path.join(
        current_dir, "data", "txagent_client_tools.json"
    ),
    "mcp_auto_loader_expert_feedback": os.path.join(
        current_dir, "data", "expert_feedback_tools.json"
    ),
    "adverse_event": os.path.join(current_dir, "data", "adverse_event_tools.json"),
    "dailymed": os.path.join(current_dir, "data", "dailymed_tools.json"),
    "fda_orange_book": os.path.join(current_dir, "data", "fda_orange_book_tools.json"),
    "faers_analytics": os.path.join(current_dir, "data", "faers_analytics_tools.json"),
    "cdc": os.path.join(current_dir, "data", "cdc_tools.json"),
    "nhanes": os.path.join(current_dir, "data", "nhanes_tools.json"),
    "health_disparities": os.path.join(
        current_dir, "data", "health_disparities_tools.json"
    ),
    "hpa": os.path.join(current_dir, "data", "hpa_tools.json"),
    "reactome": os.path.join(current_dir, "data", "reactome_tools.json"),
    "pubchem": os.path.join(current_dir, "data", "pubchem_tools.json"),
    "medlineplus": os.path.join(current_dir, "data", "medlineplus_tools.json"),
    "rxnorm": os.path.join(current_dir, "data", "rxnorm_tools.json"),
    "loinc": os.path.join(current_dir, "data", "loinc_tools.json"),
    "uniprot": os.path.join(current_dir, "data", "uniprot_tools.json"),
    "cellosaurus": os.path.join(current_dir, "data", "cellosaurus_tools.json"),
    # 'software': os.path.join(current_dir, 'data', 'software_tools.json'),
    # Package tools - categorized software tools
    "software_bioinformatics": os.path.join(
        current_dir, "data", "packages", "bioinformatics_core_tools.json"
    ),
    "software_genomics": os.path.join(
        current_dir, "data", "packages", "genomics_tools.json"
    ),
    "software_single_cell": os.path.join(
        current_dir, "data", "packages", "single_cell_tools.json"
    ),
    "software_structural_biology": os.path.join(
        current_dir, "data", "packages", "structural_biology_tools.json"
    ),
    "software_cheminformatics": os.path.join(
        current_dir, "data", "packages", "cheminformatics_tools.json"
    ),
    "software_machine_learning": os.path.join(
        current_dir, "data", "packages", "machine_learning_tools.json"
    ),
    "software_visualization": os.path.join(
        current_dir, "data", "packages", "visualization_tools.json"
    ),
    # Scientific visualization tools
    "visualization_protein_3d": os.path.join(
        current_dir, "data", "protein_structure_3d_tools.json"
    ),
    "visualization_molecule_2d": os.path.join(
        current_dir, "data", "molecule_2d_tools.json"
    ),
    # New database tools
    "interpro": os.path.join(current_dir, "data", "interpro_tools.json"),
    "ebi_search": os.path.join(current_dir, "data", "ebi_search_tools.json"),
    "intact": os.path.join(current_dir, "data", "intact_tools.json"),
    "metabolights": os.path.join(current_dir, "data", "metabolights_tools.json"),
    "proteins_api": os.path.join(current_dir, "data", "proteins_api_tools.json"),
    "arrayexpress": os.path.join(current_dir, "data", "arrayexpress_tools.json"),
    "biostudies": os.path.join(current_dir, "data", "biostudies_tools.json"),
    "dbfetch": os.path.join(current_dir, "data", "dbfetch_tools.json"),
    "pdbe_api": os.path.join(current_dir, "data", "pdbe_api_tools.json"),
    "ena_browser": os.path.join(current_dir, "data", "ena_browser_tools.json"),
    "blast": os.path.join(current_dir, "data", "blast_tools.json"),
    "cbioportal": os.path.join(current_dir, "data", "cbioportal_tools.json"),
    "regulomedb": os.path.join(current_dir, "data", "regulomedb_tools.json"),
    "jaspar": os.path.join(current_dir, "data", "jaspar_tools.json"),
    "remap": os.path.join(current_dir, "data", "remap_tools.json"),
    "screen": os.path.join(current_dir, "data", "screen_tools.json"),
    "pride": os.path.join(current_dir, "data", "pride_tools.json"),
    "emdb": os.path.join(current_dir, "data", "emdb_tools.json"),
    "sasbdb": os.path.join(current_dir, "data", "sasbdb_tools.json"),
    "gtopdb": os.path.join(current_dir, "data", "gtopdb_tools.json"),
    "mpd": os.path.join(current_dir, "data", "mpd_tools.json"),
    "worms": os.path.join(current_dir, "data", "worms_tools.json"),
    "paleobiology": os.path.join(current_dir, "data", "paleobiology_tools.json"),
    "visualization_molecule_3d": os.path.join(
        current_dir, "data", "molecule_3d_tools.json"
    ),
    "software_scientific_computing": os.path.join(
        current_dir, "data", "packages", "scientific_computing_tools.json"
    ),
    "software_physics_astronomy": os.path.join(
        current_dir, "data", "packages", "physics_astronomy_tools.json"
    ),
    "software_earth_sciences": os.path.join(
        current_dir, "data", "packages", "earth_sciences_tools.json"
    ),
    "software_image_processing": os.path.join(
        current_dir, "data", "packages", "image_processing_tools.json"
    ),
    "software_neuroscience": os.path.join(
        current_dir, "data", "packages", "neuroscience_tools.json"
    ),
    "go": os.path.join(current_dir, "data", "gene_ontology_tools.json"),
    "compose": os.path.join(current_dir, "data", "compose_tools.json"),
    "python_executor": os.path.join(current_dir, "data", "python_executor_tools.json"),
    "idmap": os.path.join(current_dir, "data", "idmap_tools.json"),
    "disease_target_score": os.path.join(
        current_dir, "data", "disease_target_score_tools.json"
    ),
    "mcp_auto_loader_uspto_downloader": os.path.join(
        current_dir, "data", "uspto_downloader_tools.json"
    ),
    "uspto": os.path.join(current_dir, "data", "uspto_tools.json"),
    "xml": os.path.join(current_dir, "data", "xml_tools.json"),
    "mcp_auto_loader_boltz": os.path.join(
        current_dir, "data", "boltz_mcp_loader_tools.json"
    ),
    "mcp_auto_loader_esm": os.path.join(
        current_dir, "data", "mcp_auto_loader_esm.json"
    ),
    "url": os.path.join(current_dir, "data", "url_fetch_tools.json"),
    "file_download": os.path.join(current_dir, "data", "file_download_tools.json"),
    # 'langchain': os.path.join(current_dir, 'data', 'langchain_tools.json'),
    "rcsb_pdb": os.path.join(current_dir, "data", "rcsb_pdb_tools.json"),
    "rcsb_search": os.path.join(current_dir, "data", "rcsb_search_tools.json"),
    "tool_composition": os.path.join(
        current_dir, "data", "tool_composition_tools.json"
    ),
    "embedding": os.path.join(current_dir, "data", "embedding_tools.json"),
    "gwas": os.path.join(current_dir, "data", "gwas_tools.json"),
    "admetai": os.path.join(current_dir, "data", "admetai_tools.json"),
    # duplicate key removed
    "alphafold": os.path.join(current_dir, "data", "alphafold_tools.json"),
    "output_summarization": os.path.join(
        current_dir, "data", "output_summarization_tools.json"
    ),
    "odphp": os.path.join(current_dir, "data", "odphp_tools.json"),
    "who_gho": os.path.join(current_dir, "data", "who_gho_tools.json"),
    # Marine Regions - VLIZ geographic authority file for oceans, seas, and marine regions worldwide
    "marine_regions": os.path.join(current_dir, "data", "marine_regions_tools.json"),
    # ERDDAP - NOAA CoastWatch ocean/atmospheric dataset search and metadata (SST, chlorophyll, currents)
    "erddap": os.path.join(current_dir, "data", "erddap_tools.json"),
    # MET Norway - Norwegian Meteorological Institute weather forecasts (global, no auth)
    "metnorway": os.path.join(current_dir, "data", "metnorway_tools.json"),
    "umls": os.path.join(current_dir, "data", "umls_tools.json"),
    "icd": os.path.join(current_dir, "data", "icd_tools.json"),
    "euhealth": os.path.join(current_dir, "data", "euhealth_tools.json"),
    "markitdown": os.path.join(current_dir, "data", "markitdown_tools.json"),
    # Guideline and health policy tools
    "guidelines": os.path.join(current_dir, "data", "unified_guideline_tools.json"),
    # Clinical guidelines - MAGICapp, NCI R4R, NCI Drug Dict extended, DailyMed drug classes
    "clinical_guidelines": os.path.join(
        current_dir, "data", "clinical_guidelines_tools.json"
    ),
    # FDA drug labels - official prescribing information with clinical recommendations
    "openfda_labels": os.path.join(current_dir, "data", "openfda_label_tools.json"),
    # Database tools
    "kegg": os.path.join(current_dir, "data", "kegg_tools.json"),
    "ensembl": os.path.join(current_dir, "data", "ensembl_tools.json"),
    "clinvar": os.path.join(current_dir, "data", "clinvar_tools.json"),
    "geo": os.path.join(current_dir, "data", "geo_tools.json"),
    "dbsnp": os.path.join(current_dir, "data", "dbsnp_tools.json"),
    "gnomad": os.path.join(current_dir, "data", "gnomad_tools.json"),
    # Newly added database tools
    "gbif": os.path.join(current_dir, "data", "gbif_tools.json"),
    "obis": os.path.join(current_dir, "data", "obis_tools.json"),
    "wikipathways": os.path.join(current_dir, "data", "wikipathways_tools.json"),
    "rnacentral": os.path.join(current_dir, "data", "rnacentral_tools.json"),
    "mirna": os.path.join(current_dir, "data", "mirna_tools.json"),
    "lncrna": os.path.join(current_dir, "data", "lncrna_tools.json"),
    "encode": os.path.join(current_dir, "data", "encode_tools.json"),
    "gtex": os.path.join(current_dir, "data", "gtex_tools.json"),
    "mgnify": os.path.join(current_dir, "data", "mgnify_tools.json"),
    "gdc": os.path.join(current_dir, "data", "gdc_tools.json"),
    # Ontology tools
    "ols": os.path.join(current_dir, "data", "ols_tools.json"),
    "optimizer": os.path.join(current_dir, "data", "optimizer_tools.json"),
    # Compact mode core tools
    "compact_mode": os.path.join(current_dir, "data", "compact_mode_tools.json"),
    # New Life Science Tools
    "hca_tools": os.path.join(current_dir, "data", "hca_tools.json"),
    "iedb_tools": os.path.join(current_dir, "data", "iedb_tools.json"),
    "pathway_commons_tools": os.path.join(
        current_dir, "data", "pathway_commons_tools.json"
    ),
    "biomodels_tools": os.path.join(current_dir, "data", "biomodels_tools.json"),
    # BioThings APIs (MyGene, MyVariant, MyChem)
    "biothings": os.path.join(current_dir, "data", "biothings_tools.json"),
    # FDA Pharmacogenomic Biomarkers
    "fda_pharmacogenomic_biomarkers": os.path.join(
        current_dir, "data", "fda_pharmacogenomic_biomarkers_tools.json"
    ),
    # Metabolomics Workbench
    "metabolomics_workbench": os.path.join(
        current_dir, "data", "metabolomics_workbench_tools.json"
    ),
    # PharmGKB - Pharmacogenomics
    "pharmgkb": os.path.join(current_dir, "data", "pharmgkb_tools.json"),
    # DisGeNET - Gene-Disease Associations
    # DGIdb - Drug Gene Interactions
    "dgidb": os.path.join(current_dir, "data", "dgidb_tools.json"),
    # STITCH - Chemical-Protein Interactions
    "stitch": os.path.join(current_dir, "data", "stitch_tools.json"),
    # CIViC - Clinical Interpretation of Variants in Cancer
    "civic": os.path.join(current_dir, "data", "civic_tools.json"),
    # Single-cell RNA-seq data
    "cellxgene_census": os.path.join(
        current_dir, "data", "cellxgene_census_tools.json"
    ),
    # Chromatin and epigenetics data
    "chipatlas": os.path.join(current_dir, "data", "chipatlas_tools.json"),
    # 4DN Data Portal - 3D genome organization
    "fourdn": os.path.join(current_dir, "data", "fourdn_tools.json"),
    # GTEx Portal API V2 - Tissue-specific gene expression and eQTLs
    "gtex_v2": os.path.join(current_dir, "data", "gtex_v2_tools.json"),
    # Rfam Database API - RNA families (v15.1, January 2026)
    "rfam": os.path.join(current_dir, "data", "rfam_tools.json"),
    # BiGG Models API - Genome-scale metabolic models
    "bigg_models": os.path.join(current_dir, "data", "bigg_models_tools.json"),
    # Protein-Protein Interaction (PPI) tools - STRING and BioGRID
    "ppi": os.path.join(current_dir, "data", "ppi_tools.json"),
    # BioGRID - Genetic and Protein Interactions, Chemical-Protein, PTMs
    "biogrid": os.path.join(current_dir, "data", "biogrid_tools.json"),
    # NVIDIA NIM Healthcare APIs - Structure prediction, molecular docking, genomics
    "nvidia_nim": os.path.join(current_dir, "data", "nvidia_nim_tools.json"),
    # COSMIC - Catalogue of Somatic Mutations in Cancer
    "cosmic": os.path.join(current_dir, "data", "cosmic_tools.json"),
    # OncoKB - Precision Oncology Knowledge Base
    "oncokb": os.path.join(current_dir, "data", "oncokb_tools.json"),
    # OMIM - Online Mendelian Inheritance in Man
    "omim": os.path.join(current_dir, "data", "omim_tools.json"),
    # Orphanet - Rare Disease Encyclopedia
    "orphanet": os.path.join(current_dir, "data", "orphanet_tools.json"),
    # DisGeNET - Gene-Disease Associations
    "disgenet": os.path.join(current_dir, "data", "disgenet_tools.json"),
    # BindingDB - Protein-Ligand Binding Affinities
    "bindingdb": os.path.join(current_dir, "data", "bindingdb_tools.json"),
    # GPCRdb - G Protein-Coupled Receptor Database
    "gpcrdb": os.path.join(current_dir, "data", "gpcrdb_tools.json"),
    # BRENDA - Enzyme Kinetics Database
    "brenda": os.path.join(current_dir, "data", "brenda_tools.json"),
    # SAbDab - Structural Antibody Database
    "sabdab": os.path.join(current_dir, "data", "sabdab_tools.json"),
    # IMGT - International ImMunoGeneTics Information System
    "imgt": os.path.join(current_dir, "data", "imgt_tools.json"),
    # HMDB - Human Metabolome Database
    "hmdb": os.path.join(current_dir, "data", "hmdb_tools.json"),
    # MetaCyc - Metabolic Pathway Database
    "metacyc": os.path.join(current_dir, "data", "metacyc_tools.json"),
    # ZINC - Virtual Screening Library
    "zinc": os.path.join(current_dir, "data", "zinc_tools.json"),
    # Enamine - Make-on-Demand Compounds
    "enamine": os.path.join(current_dir, "data", "enamine_tools.json"),
    # eMolecules - Vendor Aggregator
    "emolecules": os.path.join(current_dir, "data", "emolecules_tools.json"),
    # Pharos/TCRD - NIH IDG Understudied Proteins Database
    "pharos": os.path.join(current_dir, "data", "pharos_tools.json"),
    # AlphaMissense - DeepMind Pathogenicity Predictions
    "alphamissense": os.path.join(current_dir, "data", "alphamissense_tools.json"),
    # CADD - Combined Annotation Dependent Depletion
    "cadd": os.path.join(current_dir, "data", "cadd_tools.json"),
    # DepMap - Cancer Dependency Map (Sanger Cell Model Passports)
    "depmap": os.path.join(current_dir, "data", "depmap_tools.json"),
    # InterProScan - Protein Domain/Family Prediction
    "interproscan": os.path.join(current_dir, "data", "interproscan_tools.json"),
    # EVE - Evolutionary Variant Effect Predictions
    "eve": os.path.join(current_dir, "data", "eve_tools.json"),
    # Thera-SAbDab - Therapeutic Structural Antibody Database
    "therasabdab": os.path.join(current_dir, "data", "therasabdab_tools.json"),
    # DeepGO - Protein Function Prediction
    "deepgo": os.path.join(current_dir, "data", "deepgo_tools.json"),
    # ClinGen - Gene-Disease Validity, Dosage Sensitivity, Actionability
    "clingen": os.path.join(current_dir, "data", "clingen_tools.json"),
    # SpliceAI - Deep Learning Splice Prediction
    "spliceai": os.path.join(current_dir, "data", "spliceai_tools.json"),
    # IMPC - International Mouse Phenotyping Consortium (mouse KO phenotypes)
    "impc": os.path.join(current_dir, "data", "impc_tools.json"),
    # Complex Portal - Curated protein complexes (includes CORUM mammalian complexes)
    "complex_portal": os.path.join(current_dir, "data", "complex_portal_tools.json"),
    # Expression Atlas - EBI GXA baseline + differential gene expression
    "expression_atlas": os.path.join(
        current_dir, "data", "expression_atlas_tools.json"
    ),
    # ProteinsPlus - Protein-ligand docking and binding site analysis
    "proteinsplus": os.path.join(current_dir, "data", "proteinsplus_tools.json"),
    # SwissDock - Molecular docking with AutoDock Vina and Attracting Cavities
    "swissdock": os.path.join(current_dir, "data", "swissdock_tools.json"),
    # LIPID MAPS - Lipid Structure Database (lipidomics)
    "lipidmaps": os.path.join(current_dir, "data", "lipidmaps_tools.json"),
    # USDA FoodData Central - Food composition and nutrient database
    "fooddata_central": os.path.join(
        current_dir, "data", "fooddata_central_tools.json"
    ),
    # CTD - Comparative Toxicogenomics Database (chemical-gene-disease interactions)
    "ctd": os.path.join(current_dir, "data", "ctd_tools.json"),
    # NeuroMorpho - Neuronal morphology database (neuron reconstructions, morphometrics)
    "neuromorpho": os.path.join(current_dir, "data", "neuromorpho_tools.json"),
    # Allen Brain Atlas - Brain gene expression and structure data
    "allen_brain": os.path.join(current_dir, "data", "allen_brain_tools.json"),
    # GlyGen - Glycoinformatics (glycan structures, glycoproteins, glycosylation sites)
    "glygen": os.path.join(current_dir, "data", "glygen_tools.json"),
    # MGnify Expanded - Metagenomics genome catalog, biomes, study details
    "mgnify_expanded": os.path.join(current_dir, "data", "mgnify_expanded_tools.json"),
    # SGD - Saccharomyces Genome Database (yeast genes, phenotypes, interactions)
    "sgd": os.path.join(current_dir, "data", "sgd_tools.json"),
    # NCBI Datasets API v2 - Gene info, orthologs, taxonomy, genome metadata
    "ncbi_datasets": os.path.join(current_dir, "data", "ncbi_datasets_tools.json"),
    # EBI Taxonomy - Taxonomic classification, lineage, name resolution
    "ebi_taxonomy": os.path.join(current_dir, "data", "ebi_taxonomy_tools.json"),
    # Alliance of Genome Resources - Cross-species gene data from 7 model organisms
    "alliance_genome": os.path.join(current_dir, "data", "alliance_genome_tools.json"),
    # Open Targets Genetics - GWAS variant annotation, credible sets, L2G predictions
    "opentarget_genetics": os.path.join(
        current_dir, "data", "opentarget_genetics_tools.json"
    ),
    # HGNC - HUGO Gene Nomenclature Committee (authoritative human gene naming)
    "hgnc": os.path.join(current_dir, "data", "hgnc_tools.json"),
    # BV-BRC - Bacterial and Viral Bioinformatics Resource Center (pathogen genomics, AMR)
    "bvbrc": os.path.join(current_dir, "data", "bvbrc_tools.json"),
    # BioImage Archive - EBI biological imaging data (microscopy, cryo-EM, fluorescence)
    "bioimage_archive": os.path.join(
        current_dir, "data", "bioimage_archive_tools.json"
    ),
    # Plant Reactome - Gramene plant metabolic and regulatory pathways (140+ species)
    "plant_reactome": os.path.join(current_dir, "data", "plant_reactome_tools.json"),
    # Ensembl VEP - Variant Effect Predictor (HGVS, rsID annotation, variant recoding)
    "ensembl_vep": os.path.join(current_dir, "data", "ensembl_vep_tools.json"),
    # ITIS - Integrated Taxonomic Information System (US taxonomy, hierarchy, common names)
    "itis": os.path.join(current_dir, "data", "itis_tools.json"),
    # QuickGO - EBI Gene Ontology annotation browser (annotations, term details, hierarchy)
    "quickgo": os.path.join(current_dir, "data", "quickgo_tools.json"),
    # Bgee - Comparative gene expression across 29+ animal species (RNA-Seq, Affymetrix, EST)
    "bgee": os.path.join(current_dir, "data", "bgee_tools.json"),
    # OMA - Orthologous MAtrix Browser (orthology across 2,600+ genomes, HOGs, OMA Groups)
    "oma": os.path.join(current_dir, "data", "oma_tools.json"),
    # CATH - Protein Structure Classification (Class, Architecture, Topology, Homologous superfamily)
    "cath": os.path.join(current_dir, "data", "cath_tools.json"),
    # MeSH - Medical Subject Headings (NLM controlled vocabulary for PubMed indexing)
    "mesh": os.path.join(current_dir, "data", "mesh_tools.json"),
    # HPO - Human Phenotype Ontology (phenotype terms, hierarchy, clinical genetics)
    "hpo": os.path.join(current_dir, "data", "hpo_tools.json"),
    # Reactome Analysis Service - Pathway enrichment/overrepresentation analysis
    "reactome_analysis": os.path.join(
        current_dir, "data", "reactome_analysis_tools.json"
    ),
    # Rhea - Expert-curated biochemical reactions (SIB, linked to ChEBI and EC)
    "rhea": os.path.join(current_dir, "data", "rhea_tools.json"),
    # PubChem BioAssay - Biological screening data (drug discovery, toxicology)
    "pubchem_bioassay": os.path.join(
        current_dir, "data", "pubchem_bioassay_tools.json"
    ),
    # ENA Portal API - European Nucleotide Archive search (studies, samples, sequences)
    "ena_portal": os.path.join(current_dir, "data", "ena_portal_tools.json"),
    # PomBase - Fission yeast (S. pombe) genome database (gene info, phenotypes, domains)
    "pombase": os.path.join(current_dir, "data", "pombase_tools.json"),
    # EBI BioSamples - Biological sample metadata hub (60M+ samples, cross-archive)
    "biosamples": os.path.join(current_dir, "data", "biosamples_tools.json"),
    # GNPS - Mass spectrometry spectral library (metabolomics, natural products)
    "gnps": os.path.join(current_dir, "data", "gnps_tools.json"),
    # WormBase - C. elegans genome database (gene info, phenotypes, expression)
    "wormbase": os.path.join(current_dir, "data", "wormbase_tools.json"),
    # SWISS-MODEL Repository - Pre-computed protein homology models (ExPASy/SIB)
    "swissmodel": os.path.join(current_dir, "data", "swissmodel_tools.json"),
    # ProteomeXchange - Proteomics data consortium (PRIDE, MassIVE, jPOST)
    "proteomexchange": os.path.join(current_dir, "data", "proteomexchange_tools.json"),
    # PDBe Search - PDB structure search via EBI Solr (full-text, compounds, organisms)
    "pdbe_search": os.path.join(current_dir, "data", "pdbe_search_tools.json"),
    # Nextstrain - Pathogen phylogenetics and molecular epidemiology tracking
    "nextstrain": os.path.join(current_dir, "data", "nextstrain_tools.json"),
    # UCSC Genome Browser - Genome sequences, gene search, annotation tracks (220+ genomes)
    "ucsc_genome": os.path.join(current_dir, "data", "ucsc_genome_tools.json"),
    # ChEBI - Chemical Entities of Biological Interest (EBI chemical ontology, 195K+ compounds)
    "chebi": os.path.join(current_dir, "data", "chebi_tools.json"),
    # UniChem - EBI unified chemical cross-referencing across 40+ databases
    "unichem": os.path.join(current_dir, "data", "unichem_tools.json"),
    # PANTHER - Protein classification, gene enrichment, and ortholog analysis (144 organisms)
    "panther": os.path.join(current_dir, "data", "panther_tools.json"),
    # Ensembl LD - Linkage disequilibrium from 1000 Genomes (population genetics)
    "ensembl_ld": os.path.join(current_dir, "data", "ensembl_ld_tools.json"),
    # Ensembl Regulation - TF binding motifs, constrained elements, binding matrices
    "ensembl_regulation": os.path.join(
        current_dir, "data", "ensembl_regulation_tools.json"
    ),
    # Ensembl Phenotypes - Gene/region/variant phenotype associations (GWAS, ClinVar, OMIM)
    "ensembl_phenotype": os.path.join(
        current_dir, "data", "ensembl_phenotype_tools.json"
    ),
    # Europe PMC Annotations - Text-mined entities from articles (chemicals, organisms, GO)
    "europepmc_annotations": os.path.join(
        current_dir, "data", "europepmc_annotations_tools.json"
    ),
    # WFGY ProblemMap - LLM/RAG failure triage prompt bundle (local, no API call)
    "wfgy_promptbundle": os.path.join(
        current_dir, "data", "wfgy_promptbundle_tools.json"
    ),
    # UniProt ID Mapping - Cross-database identifier conversion (100+ databases)
    "uniprot_idmapping": os.path.join(
        current_dir, "data", "uniprot_idmapping_tools.json"
    ),
    # Open Tree of Life - Phylogenetic tree of life (name resolution, taxonomy, MRCA, subtrees)
    "opentree": os.path.join(current_dir, "data", "opentree_tools.json"),
    # iNaturalist - Citizen science biodiversity observations (taxa, observations, species counts)
    "inaturalist": os.path.join(current_dir, "data", "inaturalist_tools.json"),
    # NCI Thesaurus - National Cancer Institute terminology (cancer diseases, drugs, genes)
    "nci_thesaurus": os.path.join(current_dir, "data", "nci_thesaurus_tools.json"),
    # ClinGen Allele Registry - Standardized allele IDs (HGVS normalization, cross-references)
    "clingen_ar": os.path.join(current_dir, "data", "clingen_ar_tools.json"),
    # NDEx - Network Data Exchange (biological network repository, PPI, signaling, regulatory networks)
    "ndex": os.path.join(current_dir, "data", "ndex_tools.json"),
    # Gene Ontology API - GO term details, gene functional annotations, gene-function associations
    "go_api": os.path.join(current_dir, "data", "go_api_tools.json"),
    # Ensembl Compara - Comparative genomics (orthologues, paralogues, gene trees)
    "ensembl_compara": os.path.join(current_dir, "data", "ensembl_compara_tools.json"),
    # Monarch Initiative V3 - Cross-species gene-disease-phenotype associations
    "monarch_v3": os.path.join(current_dir, "data", "monarch_v3_tools.json"),
    # EBI Proteins API Extended - Mutagenesis experiments and PTM proteomics evidence
    "ebi_proteins_ext": os.path.join(
        current_dir, "data", "ebi_proteins_ext_tools.json"
    ),
    # PDBe-KB Graph API - Aggregated structural knowledge base (ligand sites, PPI interfaces, stats)
    "pdbe_kb": os.path.join(current_dir, "data", "pdbe_kb_tools.json"),
    # UniProt Reference Datasets - Diseases, keywords, and proteomes controlled vocabularies
    "uniprot_ref": os.path.join(current_dir, "data", "uniprot_ref_tools.json"),
    # Disease Ontology - Standardized human disease classification (DO terms, hierarchy, cross-refs)
    "disease_ontology": os.path.join(
        current_dir, "data", "disease_ontology_tools.json"
    ),
    # RCSB PDB Data API - Direct REST access to PDB entry details, assemblies, non-polymer entities
    "rcsb_data": os.path.join(current_dir, "data", "rcsb_data_tools.json"),
    # EBI Proteins Features - Domain/site annotations, molecule processing, secondary structure
    "ebi_proteins_features": os.path.join(
        current_dir, "data", "ebi_proteins_features_tools.json"
    ),
    # InterPro Extended - Reverse lookup: find proteins containing a specific domain
    "interpro_ext": os.path.join(current_dir, "data", "interpro_ext_tools.json"),
    # STRING Extended - Per-protein functional annotations (GO, KEGG, disease, tissue)
    "string_ext": os.path.join(current_dir, "data", "string_ext_tools.json"),
    # Ensembl Info - Genome assembly metadata and species catalog
    "ensembl_info": os.path.join(current_dir, "data", "ensembl_info_tools.json"),
    # Epigenomics - Histone marks, DNA methylation, chromatin accessibility, regulatory elements
    "epigenomics": os.path.join(current_dir, "data", "epigenomics_tools.json"),
    # 3D Beacons - Aggregated 3D structure models from PDBe, AlphaFold, SWISS-MODEL, PED
    "three_d_beacons": os.path.join(current_dir, "data", "three_d_beacons_tools.json"),
    # Reactome Content Service - Pathway search, contained events, enhanced details
    "reactome_content": os.path.join(
        current_dir, "data", "reactome_content_tools.json"
    ),
    # InterPro Entry - Protein-to-domain mappings and keyword-based entry search
    "interpro_entry": os.path.join(current_dir, "data", "interpro_entry_tools.json"),
    # Ensembl Sequence - Region DNA and ID-based protein/cDNA sequence retrieval
    "ensembl_sequence": os.path.join(
        current_dir, "data", "ensembl_sequence_tools.json"
    ),
    # MyDisease.info - BioThings disease annotation aggregator (MONDO, DO, CTD, HPO, DisGeNET)
    "mydisease": os.path.join(current_dir, "data", "mydisease_tools.json"),
    # EBI OxO - Ontology cross-reference mappings across biomedical databases
    "oxo": os.path.join(current_dir, "data", "oxo_tools.json"),
    # InterPro Domain Architecture - Protein domain positions, structure mapping, clan members
    "interpro_domain_arch": os.path.join(
        current_dir, "data", "interpro_domain_arch_tools.json"
    ),
    # WikiPathways Extended - Gene lists from pathways and gene-to-pathway lookups
    "wikipathways_ext": os.path.join(
        current_dir, "data", "wikipathways_ext_tools.json"
    ),
    # EBI Gene Expression Atlas (GxA) - Baseline/differential gene expression experiments
    "gxa": os.path.join(current_dir, "data", "gxa_tools.json"),
    # CellxGene Discovery - Single-cell RNA-seq dataset/collection browsing
    "cellxgene_discovery": os.path.join(
        current_dir, "data", "cellxgene_discovery_tools.json"
    ),
    # Ensembl Archive - Stable ID versioning and history tracking
    "ensembl_archive": os.path.join(current_dir, "data", "ensembl_archive_tools.json"),
    # KEGG Extended - Gene-pathway links, pathway gene lists, compound details
    "kegg_ext": os.path.join(current_dir, "data", "kegg_ext_tools.json"),
    # EOL - Encyclopedia of Life (biodiversity knowledge aggregator: species, taxonomy, media)
    "eol": os.path.join(current_dir, "data", "eol_tools.json"),
    # Ensembl Map - Coordinate system conversion and assembly mapping
    "ensembl_map": os.path.join(current_dir, "data", "ensembl_map_tools.json"),
    # Ensembl Overlap - Features overlapping a genomic region (genes, variants, regulatory)
    "ensembl_overlap": os.path.join(current_dir, "data", "ensembl_overlap_tools.json"),
    # Ensembl Xrefs - External database cross-references for genes and proteins
    "ensembl_xrefs": os.path.join(current_dir, "data", "ensembl_xrefs_tools.json"),
    # Ensembl Variation Extended - Population genetics, linkage disequilibrium, haplotypes
    "ensembl_variation_ext": os.path.join(
        current_dir, "data", "ensembl_variation_ext_tools.json"
    ),
    # EBI Proteins Coordinates - Protein 3D structural coordinates
    "ebi_proteins_coordinates": os.path.join(
        current_dir, "data", "ebi_proteins_coordinates_tools.json"
    ),
    # EBI Proteins Epitope - Immunological epitope annotations
    "ebi_proteins_epitope": os.path.join(
        current_dir, "data", "ebi_proteins_epitope_tools.json"
    ),
    # EBI Proteins Interactions - Protein-protein interaction evidence
    "ebi_proteins_interactions": os.path.join(
        current_dir, "data", "ebi_proteins_interactions_tools.json"
    ),
    # PDBe Compound - Small molecule compound summaries and cross-references
    "pdbe_compound": os.path.join(current_dir, "data", "pdbe_compound_tools.json"),
    # PDBe Ligands - Structure-level ligand lists and residue details
    "pdbe_ligands": os.path.join(current_dir, "data", "pdbe_ligands_tools.json"),
    # PDBe SIFTS - Structure-to-sequence mappings (UniProt, Pfam, CATH, EC)
    "pdbe_sifts": os.path.join(current_dir, "data", "pdbe_sifts_tools.json"),
    # PDBe Validation - Experimental validation reports (R-factor, clashscore, geometry)
    "pdbe_validation": os.path.join(current_dir, "data", "pdbe_validation_tools.json"),
    # RCSB Advanced Search - Complex multi-attribute PDB queries
    "rcsb_advanced_search": os.path.join(
        current_dir, "data", "rcsb_advanced_search_tools.json"
    ),
    # RCSB GraphQL - Flexible PDB data retrieval via GraphQL schema
    "rcsb_graphql": os.path.join(current_dir, "data", "rcsb_graphql_tools.json"),
    # Reactome Interactors - Protein interaction data from IntAct/ChEMBL
    "reactome_interactors": os.path.join(
        current_dir, "data", "reactome_interactors_tools.json"
    ),
    # UniParc - UniProt Archive cross-references across sequence databases
    "uniparc": os.path.join(current_dir, "data", "uniparc_tools.json"),
    # UniProt Locations - Subcellular location controlled vocabulary
    "uniprot_locations": os.path.join(
        current_dir, "data", "uniprot_locations_tools.json"
    ),
    # UniProt Taxonomy - Taxonomy nodes and lineage data from UniProt
    "uniprot_taxonomy": os.path.join(
        current_dir, "data", "uniprot_taxonomy_tools.json"
    ),
    # UniRef - UniProt Reference Clusters (100/90/50 identity clusters)
    "uniref": os.path.join(current_dir, "data", "uniref_tools.json"),
    # ClinGen Dosage Sensitivity - Haploinsufficiency and triplosensitivity scores
    "clingen_dosage": os.path.join(
        current_dir, "data", "clingen_dosage_api_tools.json"
    ),
    # Dfam - Repetitive DNA element database (transposons, SINEs, LINEs)
    "dfam": os.path.join(current_dir, "data", "dfam_tools.json"),
    # DisProt - Intrinsically disordered protein regions database
    "disprot": os.path.join(current_dir, "data", "disprot_tools.json"),
    # Genome Nexus - Cancer variant annotation aggregator (VEP, COSMIC, ClinVar)
    "genome_nexus": os.path.join(current_dir, "data", "genome_nexus_tools.json"),
    # g:Profiler - Functional enrichment, gene ID conversion, ortholog mapping
    "gprofiler": os.path.join(current_dir, "data", "gprofiler_tools.json"),
    # Harmonizome - Aggregated gene-attribute associations from 114 datasets
    "harmonizome": os.path.join(current_dir, "data", "harmonizome_tools.json"),
    # MobiDB - Intrinsic disorder and mobility annotations for proteins
    "mobidb": os.path.join(current_dir, "data", "mobidb_tools.json"),
    # OmniPath - Signaling network (ligand-receptor, enzyme-substrate, complexes)
    "omnipath": os.path.join(current_dir, "data", "omnipath_tools.json"),
    # OrthoDB - Hierarchical orthology database (orthologs, paralogs across 1,300+ species)
    "orthodb": os.path.join(current_dir, "data", "orthodb_tools.json"),
    # SynBioHub - Synthetic biology parts and designs repository (SBOL standard)
    "synbiohub": os.path.join(current_dir, "data", "synbiohub_tools.json"),
    # BioPortal - NCBO ontology browser and annotation service
    "bioportal": os.path.join(current_dir, "data", "bioportal_tools.json"),
    # FlyBase - Drosophila melanogaster genetics (via Alliance of Genome Resources)
    "flybase": os.path.join(current_dir, "data", "flybase_tools.json"),
    # ZFIN - Zebrafish Information Network (via Alliance of Genome Resources)
    "zfin": os.path.join(current_dir, "data", "zfin_tools.json"),
    # Pfam - Protein families database (via InterPro API)
    "pfam": os.path.join(current_dir, "data", "pfam_tools.json"),
    # PubChem Toxicity - Chemical toxicity, GHS hazard, carcinogen classification, LD50 data
    "pubchem_tox": os.path.join(current_dir, "data", "pubchem_tox_tools.json"),
    # ClinicalTrials.gov - World's largest clinical trial registry (572,000+ trials)
    # EpiGraphDB - Mendelian Randomization, genetic correlations, drug repurposing via GWAS
    "epigraphdb": os.path.join(current_dir, "data", "epigraphdb_tools.json"),
    # Bio.tools - ELIXIR bioinformatics tool/software registry (30,000+ entries)
    "biotools_registry": os.path.join(
        current_dir, "data", "biotools_registry_tools.json"
    ),
    # Identifiers.org - ELIXIR biological identifier resolution service (800+ namespaces)
    "identifiers_org": os.path.join(current_dir, "data", "identifiers_org_tools.json"),
    # Europe PMC Citations - Citation network traversal (who cites / is cited by)
    "europepmc_citations": os.path.join(
        current_dir, "data", "europepmc_citations_tools.json"
    ),
    # TCIA - The Cancer Imaging Archive (medical imaging datasets)
    "tcia": os.path.join(current_dir, "data", "tcia_tools.json"),
    # OpenNeuro - Neuroimaging data repository (BIDS datasets)
    "openneuro": os.path.join(current_dir, "data", "openneuro_tools.json"),
    # ModelDB - Computational neuroscience model repository (Yale/SenseLab)
    "modeldb": os.path.join(current_dir, "data", "modeldb_tools.json"),
    # KEGG BRITE - Hierarchical functional classification (enzymes, kinases, transporters, GPCRs)
    "kegg_brite": os.path.join(current_dir, "data", "kegg_brite_tools.json"),
    # OmicsDI - Omics Discovery Index (integrated multi-omics repository search)
    "omicsdi": os.path.join(current_dir, "data", "omicsdi_tools.json"),
    # CPIC - Clinical Pharmacogenomics Implementation Consortium
    "cpic": os.path.join(current_dir, "data", "cpic_tools.json"),
    # PDB-REDO - Re-refined PDB structures with improved quality metrics
    "pdb_redo": os.path.join(current_dir, "data", "pdb_redo_tools.json"),
    # BMRB - Biological Magnetic Resonance Data Bank (NMR data for proteins and metabolites)
    "bmrb": os.path.join(current_dir, "data", "bmrb_tools.json"),
    # PharmVar - Pharmacogene Variation Consortium (star allele definitions)
    "pharmvar": os.path.join(current_dir, "data", "pharmvar_tools.json"),
    # Catalogue of Life - Global species index (2M+ species from 165+ databases)
    "col": os.path.join(current_dir, "data", "col_tools.json"),
    # MassBank Europe - Open-access MS spectral library for metabolomics and environmental chemistry
    "massbank": os.path.join(current_dir, "data", "massbank_tools.json"),
    # LOTUS - Natural products database (750K+ structure-organism pairs)
    "lotus": os.path.join(current_dir, "data", "lotus_tools.json"),
    # MSigDB - Molecular Signatures Database (33K+ gene sets for GSEA)
    "msigdb": os.path.join(current_dir, "data", "msigdb_tools.json"),
    # HumanMine - InterMine data warehouse for human/mouse/rat genomics
    "humanmine": os.path.join(current_dir, "data", "humanmine_tools.json"),
    # VariantValidator - HGVS variant validation and nomenclature conversion
    "variant_validator": os.path.join(
        current_dir, "data", "variant_validator_tools.json"
    ),
    # IDR - Image Data Resource, public imaging datasets from published studies
    "idr": os.path.join(current_dir, "data", "idr_tools.json"),
    # OpenFDA - FDA drug labels, adverse events, and NDC directory
    "openfda": os.path.join(current_dir, "data", "openfda_tools.json"),
    # KLIFS - Kinase-Ligand Interaction Fingerprints and Structures
    "klifs": os.path.join(current_dir, "data", "klifs_tools.json"),
    # GeneNetwork - systems genetics QTL and gene expression for genetic crosses
    "genenetwork": os.path.join(current_dir, "data", "genenetwork_tools.json"),
    # ChannelsDB - protein channel, tunnel, and pore data for PDB structures
    "channelsdb": os.path.join(current_dir, "data", "channelsdb_tools.json"),
    # FlyMine - InterMine data warehouse for Drosophila melanogaster genomics
    "flymine": os.path.join(current_dir, "data", "flymine_tools.json"),
    # MouseMine - InterMine data warehouse for mouse genomics from MGI
    "mousemine": os.path.join(current_dir, "data", "mousemine_tools.json"),
    # TargetMine - InterMine data warehouse for drug target discovery
    "targetmine": os.path.join(current_dir, "data", "targetmine_tools.json"),
    # iCite - NIH citation metrics, RCR, APT scores for PubMed publications
    "icite": os.path.join(current_dir, "data", "icite_tools.json"),
    # scite - smart citation tallies (supporting/contradicting/mentioning)
    "scite": os.path.join(current_dir, "data", "scite_tools.json"),
    # VEuPathDB - eukaryotic pathogen, vector and host genomics
    "veupathdb": os.path.join(current_dir, "data", "veupathdb_tools.json"),
    # GeneNetwork Extended - trait and dataset detail info
    "genenetwork_ext": os.path.join(current_dir, "data", "genenetwork_ext_tools.json"),
    # Open Food Facts - commercial food products with barcodes, Nutri-Score, NOVA, ingredients
    "openfoodfacts": os.path.join(current_dir, "data", "openfoodfacts_tools.json"),
    # MIBiG - Minimum Information about a Biosynthetic Gene Cluster (natural product BGCs)
    "mibig": os.path.join(current_dir, "data", "mibig_tools.json"),
    # ScanProsite - Protein motif scanning against PROSITE patterns (ExPASy/SIB)
    "scanprosite": os.path.join(current_dir, "data", "scanprosite_tools.json"),
    # PDBe Graph API - Bound molecules, UniProt mappings, compound details, FunPDBe
    "pdbe_graph": os.path.join(current_dir, "data", "pdbe_graph_tools.json"),
    # NCBI Gene - E-utilities gene search and summary (Entrez Gene)
    "ncbi_gene": os.path.join(current_dir, "data", "ncbi_gene_tools.json"),
    # DataCite - research data DOIs for datasets, software, samples across repositories
    "datacite": os.path.join(current_dir, "data", "datacite_tools.json"),
    # Figshare - open-access research repository for datasets, figures, code, posters
    "figshare": os.path.join(current_dir, "data", "figshare_tools.json"),
    # Human Protein Atlas - protein expression across tissues, subcellular location, disease, cancer
    # FPbase - fluorescent protein database with spectral properties, sequences, structures
    "fpbase": os.path.join(current_dir, "data", "fpbase_tools.json"),
    # ROR - Research Organization Registry for institution identifiers and metadata
    "ror": os.path.join(current_dir, "data", "ror_tools.json"),
    # ORCID - researcher identifiers, profiles, and publication lists
    "orcid": os.path.join(current_dir, "data", "orcid_tools.json"),
    # PanelApp - Genomics England gene panels for clinical genetic testing
    "panelapp": os.path.join(current_dir, "data", "panelapp_tools.json"),
    # Semantic Scholar Extended - paper details, author profiles, recommendations
    "semantic_scholar_ext": os.path.join(
        current_dir, "data", "semantic_scholar_ext_tools.json"
    ),
    # bioRxiv Extended - list recent preprints by date range
    "biorxiv_ext": os.path.join(current_dir, "data", "biorxiv_ext_tools.json"),
    # World Bank - World Development Indicators (GDP, population, health, education, 200+ countries)
    "worldbank": os.path.join(current_dir, "data", "worldbank_tools.json"),
    # IMF - World Economic Outlook macroeconomic data (GDP growth, inflation, unemployment, debt)
    # Open-Meteo - Free weather forecast, historical climate, air quality, and geocoding
    "open_meteo": os.path.join(current_dir, "data", "open_meteo_tools.json"),
    # EVA - European Variation Archive (EBI) for population variant data
    "eva": os.path.join(current_dir, "data", "eva_tools.json"),
    # eQTL Catalogue - Expression quantitative trait loci associations
    "eqtl": os.path.join(current_dir, "data", "eqtl_tools.json"),
    # OSDR - NASA Open Science Data Repository (space biology studies)
    "osdr": os.path.join(current_dir, "data", "osdr_tools.json"),
    # Gene2Phenotype - EBI curated gene-disease associations for clinical genetics
    "gene2phenotype": os.path.join(current_dir, "data", "gene2phenotype_tools.json"),
    # NASA Exoplanet Archive - ADQL queries for 5500+ confirmed exoplanets and stellar hosts
    "nasa_exoplanet": os.path.join(current_dir, "data", "nasa_exoplanet_tools.json"),
    # OpenStreetMap Nominatim - Free geocoding and reverse geocoding worldwide
    "nominatim": os.path.join(current_dir, "data", "nominatim_tools.json"),
    # REST Countries - Comprehensive country metadata (population, languages, currencies, borders)
    # eBird - Cornell Lab bird taxonomy and regional species lists (no API key)
    "ebird_taxonomy": os.path.join(current_dir, "data", "ebird_taxonomy_tools.json"),
    # CRAN R Package Database - Metadata for 20,000+ R packages including versions and dependencies
    "cran": os.path.join(current_dir, "data", "cran_tools.json"),
    # NASA CMR - Common Metadata Repository for 40,000+ Earth observation datasets
    "nasa_cmr": os.path.join(current_dir, "data", "nasa_cmr_tools.json"),
    # DataONE - Federation of 43+ environmental data repositories (3.2M+ datasets)
    "dataone": os.path.join(current_dir, "data", "dataone_tools.json"),
    # Dryad - Open research data repository for life sciences and other disciplines
    "dryad": os.path.join(current_dir, "data", "dryad_tools.json"),
    # Dataverse (Harvard) - Open-source research data repository platform
    "dataverse": os.path.join(current_dir, "data", "dataverse_tools.json"),
    # SDSS - Sloan Digital Sky Survey DR18, SQL queries for 500M+ astronomical objects
    "sdss": os.path.join(current_dir, "data", "sdss_tools.json"),
    # NASA NED - NASA/IPAC Extragalactic Database for galaxies, quasars, and AGN
    "nasa_ned": os.path.join(current_dir, "data", "nasa_ned_tools.json"),
    # GitHub - Public repository search and metadata via GitHub API
    "github": os.path.join(current_dir, "data", "github_tools.json"),
    # LitVar2 - NCBI variant-literature linking (search variants, get publications)
    "litvar": os.path.join(current_dir, "data", "litvar_tools.json"),
    # PubTator3 Extended - entity annotation extraction from PubMed articles
    "pubtator3_ext": os.path.join(current_dir, "data", "pubtator3_ext_tools.json"),
    # RCSB Chemical Components - PDB ligand/small molecule chemical info
    "rcsb_chemcomp": os.path.join(current_dir, "data", "rcsb_chemcomp_tools.json"),
    # NCI Drug Dictionary - cancer drug definitions, aliases, and NCI concept IDs
    "nci_drugdict": os.path.join(current_dir, "data", "nci_drugdict_tools.json"),
    # Eurostat - EU statistical office data (GDP, population, health, environment)
    "eurostat": os.path.join(current_dir, "data", "eurostat_tools.json"),
    # USGS Earthquake - Real-time and historical earthquake data from USGS FDSN
    "usgs_earthquake": os.path.join(current_dir, "data", "usgs_earthquake_tools.json"),
    # JPL Horizons - Solar system body lookup and physical data from NASA JPL
    "jpl_horizons": os.path.join(current_dir, "data", "jpl_horizons_tools.json"),
    # NASA SBDB - Small Body Database for asteroids and comets (1.3M+ objects)
    "nasa_sbdb": os.path.join(current_dir, "data", "nasa_sbdb_tools.json"),
    # Space - ISS position/crew tracker and sunrise/sunset times (Open Notify API)
    # COD - Crystallography Open Database for 500K+ crystal structures
    "cod_crystal": os.path.join(current_dir, "data", "cod_crystal_tools.json"),
    # HuggingFace Hub - ML model/dataset search and metadata (500K+ models)
    "huggingface": os.path.join(current_dir, "data", "huggingface_tools.json"),
    # OpenML - Open machine learning benchmark datasets and tasks
    "openml": os.path.join(current_dir, "data", "openml_tools.json"),
    # Metropolitan Museum of Art - 400K+ open-access artworks (search and object detail)
    # Victoria and Albert Museum - 5000 years of art and design (search and object detail)
    # Europeana - 50M+ European cultural heritage items (museums, libraries, archives)
    # Exchange Rate - live currency exchange rates for 150+ currencies (no auth)
    # Crates.io - Rust package registry (150K+ crates with search and details)
    # Internet Archive - Digital library of 40M+ items (books, audio, video, web, software)
    # Anaconda.org - Conda package registry (conda-forge, bioconda, 200K+ packages)
    "anaconda": os.path.join(current_dir, "data", "anaconda_tools.json"),
    # NASA EONET - Natural event tracker (wildfires, storms, volcanoes, floods)
    "nasa_eonet": os.path.join(current_dir, "data", "nasa_eonet_tools.json"),
    # POWO - Plants of the World Online by Kew Gardens (1.3M+ plant names)
    "powo": os.path.join(current_dir, "data", "powo_tools.json"),
    # NeuroVault - Neuroimaging statistical maps repository (16K+ collections, 650K+ images)
    "neurovault": os.path.join(current_dir, "data", "neurovault_tools.json"),
    # Disease.sh - COVID-19 and public health statistics (231 countries, historical data)
    "diseasesh": os.path.join(current_dir, "data", "diseasesh_tools.json"),
    # OpenCitations COCI - Open scholarly citation index (references, citations, counts)
    "opencitations": os.path.join(current_dir, "data", "opencitations_tools.json"),
    # Wikidata Entity API - search and retrieve Wikidata items/entities by ID
    "wikidata_entity": os.path.join(current_dir, "data", "wikidata_entity_tools.json"),
    # iDigBio - Integrated Digitized Biocollections (130M+ natural history specimens)
    "idigbio": os.path.join(current_dir, "data", "idigbio_tools.json"),
    # ELIXIR TeSS - Bioinformatics training materials and events aggregator
    "elixir_tess": os.path.join(current_dir, "data", "elixir_tess_tools.json"),
    # Wikimedia Stats - Wikipedia page views and top articles analytics
    # Art Institute of Chicago - 130K+ artworks open access collection
    # Cleveland Museum of Art - 61K+ open access artworks
    # Open Notify - ISS real-time position and astronauts in space
    # CEDA - UK Centre for Environmental Data Analysis climate datasets
    "ceda": os.path.join(current_dir, "data", "ceda_tools.json"),
    # Sunrise-Sunset API - solar event times for any location
    "sunrise_sunset": os.path.join(current_dir, "data", "sunrise_sunset_tools.json"),
    # Openverse - Creative Commons licensed images (700M+ from Flickr, Wikimedia, museums)
    # US College Scorecard - higher education data (6000+ schools, admission rates, costs)
    # FEC - US Federal Election Commission candidate and financial data
    # Smithsonian Open Access - 5M+ digitized museum objects from 19 Smithsonian institutions
    # Library of Congress - 21M+ digitized historical items (photos, maps, manuscripts)
    # SoilGrids - global soil property predictions at any location (ISRIC)
    "soilgrids": os.path.join(current_dir, "data", "soilgrids_tools.json"),
    # US Treasury Fiscal Data - national debt, exchange rates, interest rates, debt breakdown
    # Chronicling America - historic US newspaper search (LOC, 1777-1963)
    # GBIF Extended - species detail by key and species name autocomplete
    "gbif_ext": os.path.join(current_dir, "data", "gbif_ext_tools.json"),
    # Frankfurter - real-time and historical currency exchange rates (ECB data)
    # Datamuse - word-finding API (synonyms, antonyms, rhymes, semantic similarity)
    # National Weather Service (NWS) - US weather forecasts, alerts, and point metadata
    "nws": os.path.join(current_dir, "data", "nws_tools.json"),
    # SpaceX - rocket launches, rockets, launchpads, and crew data
    # USGS Water Services - real-time streamflow, water level, and temperature data
    "usgs_water": os.path.join(current_dir, "data", "usgs_water_tools.json"),
    # Spaceflight News API - 30K+ space news articles from major sites
    # Launch Library 2 - upcoming rocket launches worldwide (all providers)
    # US Census Bureau - population and demographic data (no key required)
    "uscensus": os.path.join(current_dir, "data", "uscensus_tools.json"),
    # Open-Meteo Marine - ocean wave/swell forecasts for any coastal location
    "open_meteo_marine": os.path.join(
        current_dir, "data", "open_meteo_marine_tools.json"
    ),
    # Open-Meteo Flood - river discharge and flood forecasts (GloFAS/Copernicus)
    "open_meteo_flood": os.path.join(
        current_dir, "data", "open_meteo_flood_tools.json"
    ),
    # NASA DONKI - space weather events (CME, flares, storms, particles, shocks)
    "nasa_donki": os.path.join(current_dir, "data", "nasa_donki_tools.json"),
    # OpenTopoData - terrain elevation data for any global location (SRTM/ASTER/NED)
    "opentopodata": os.path.join(current_dir, "data", "opentopodata_tools.json"),
    # Disease.sh - COVID-19 global and country-level statistics
    # NASA NeoWs - Near Earth Object data (asteroids, close approaches)
    "nasa_neows": os.path.join(current_dir, "data", "nasa_neows_tools.json"),
    # REST Countries Extended - country details by name, region, language
    # STRING Network - protein-protein interaction networks
    "string_network": os.path.join(current_dir, "data", "string_network_tools.json"),
    # UniProt Proteomes - proteome reference data
    "uniprot_proteomes": os.path.join(
        current_dir, "data", "uniprot_proteomes_tools.json"
    ),
    # wttr.in - current weather in JSON format for any city or coordinates
    # TimeAPI.io - current time by timezone or geographic coordinates
    # ExchangeRate-API - current foreign exchange rates for 166 currencies
    # BigDataCloud - reverse geocode lat/lng to country, city, region (no auth)
    # Open-Meteo Climate - historical climate data from 1950 via ERA5/CMIP6 models
    "open_meteo_climate": os.path.join(
        current_dir, "data", "open_meteo_climate_tools.json"
    ),
    # Open-Meteo Air Quality - hourly PM2.5/PM10/ozone forecast and history
    "open_meteo_airquality": os.path.join(
        current_dir, "data", "open_meteo_airquality_tools.json"
    ),
    # Open Elevation - terrain elevation data from lat/lon coordinates
    # Disease.sh extended - COVID historical and vaccine coverage data
    "disease_sh_ext": os.path.join(current_dir, "data", "disease_sh_ext_tools.json"),
    # Where the ISS At - real-time ISS position and velocity
    # Wikipedia extended - featured daily content and on-this-day events
    "wikipedia_ext": os.path.join(current_dir, "data", "wikipedia_ext_tools.json"),
    # Data.gov - U.S. government open data catalog search
    # WAQI - World Air Quality Index real-time AQI data
    "waqi": os.path.join(current_dir, "data", "waqi_tools.json"),
    # BLS - Bureau of Labor Statistics economic time series (CPI, unemployment, etc.)
    # SEC EDGAR - SEC filing search and company financial facts (XBRL)
    # InspireHEP - high energy physics literature database
    "inspirehep": os.path.join(current_dir, "data", "inspirehep_tools.json"),
    # Federal Register - US government regulations, rules, notices, presidential documents
    # NASA TechPort - NASA technology development projects and investments
    # Crates.io - Rust package registry (search crates, version history, downloads)
    # MyMemory Translation - free machine translation for 200+ language pairs
    # IETF Datatracker - Internet standards (RFCs, Internet-Drafts, protocol specs)
    # Gutendex - Project Gutenberg ebooks catalog search
    # Bioconductor - R/Bioconductor bioinformatics package search and metadata (via R-universe)
    "bioconductor": os.path.join(current_dir, "data", "bioconductor_tools.json"),
    # ArtIC - Art Institute of Chicago open-access artwork search and metadata
    "artic": os.path.join(current_dir, "data", "artic_tools.json"),
    # ADA/AHA/ACC/NCCN - Clinical society guidelines (diabetes, cardiology, oncology)
    "ada_aha_nccn": os.path.join(current_dir, "data", "ada_aha_nccn_tools.json"),
    # CLUE.io - L1000 Connectivity Map perturbation signatures
    "clue": os.path.join(current_dir, "data", "clue_tools.json"),
    # TIMER2.0 - Tumor immune estimation and gene-immune correlations
    "timer": os.path.join(current_dir, "data", "timer_tools.json"),
    # PROTAC-DB - PROTAC compound database
    "protacdb": os.path.join(current_dir, "data", "protacdb_tools.json"),
    # DNA Design Tools - Local restriction site, ORF, GC content, translation
    "dna_tools": os.path.join(current_dir, "data", "dna_tools.json"),
    # Drug Synergy - Bliss, HSA, ZIP synergy models (local computation)
    "drug_synergy": os.path.join(current_dir, "data", "drug_synergy_tools.json"),
    # Dose-Response Analysis - 4PL curve fitting and IC50 calculation (local)
    "dose_response": os.path.join(current_dir, "data", "dose_response_tools.json"),
    # Survival Analysis - Kaplan-Meier, log-rank test, Cox regression (local)
    "survival": os.path.join(current_dir, "data", "survival_tools.json"),
    # ChemCompute - Local computational chemistry tools (RDKit SA Score)
    "chem_compute": os.path.join(current_dir, "data", "chem_compute_tools.json"),
    # L1000FWD - L1000 Fireworks Connectivity Map signature search
    "l1000fwd": os.path.join(current_dir, "data", "l1000fwd_tools.json"),
    # Cell Painting - IDR high-content microscopy screens, plates, and well-level data
    "cellpainting": os.path.join(current_dir, "data", "cellpainting_tools.json"),
}

# Auto-load any user-provided tools from ~/.tooluniverse/user_tools/
user_tools_dir = os.path.expanduser("~/.tooluniverse/data/user_tools")

if os.path.exists(user_tools_dir):
    for filename in os.listdir(user_tools_dir):
        if filename.endswith(".json"):
            key = f"user_{filename.replace('.json', '')}"
            default_tool_files[key] = os.path.join(user_tools_dir, filename)


def _get_hook_config_file_path():
    """
    Get the path to the hook configuration file.

    This function uses the same logic as HookManager._get_config_file_path()
    to ensure consistent path resolution across different installation scenarios.

    Returns
        Path: Path to the hook_config.json file
    """
    try:
        import importlib.resources as pkg_resources
    except ImportError:
        import importlib_resources as pkg_resources

    try:
        data_files = pkg_resources.files("tooluniverse.template")
        return data_files / "hook_config.json"
    except Exception:
        return Path(__file__).parent / "template" / "hook_config.json"



[docs]
def get_default_hook_config():
    """
    Get default hook configuration from hook_config.json.

    This function loads the default hook configuration from the hook_config.json
    template file, providing a single source of truth for default hook settings.
    If the file cannot be loaded, it falls back to a minimal configuration.

    Returns
        dict: Default hook configuration with basic settings
    """
    try:
        config_file = _get_hook_config_file_path()
        content = (
            config_file.read_text(encoding="utf-8")
            if hasattr(config_file, "read_text")
            else Path(config_file).read_text(encoding="utf-8")
        )
        return json.loads(content)
    except Exception:
        # Fallback to minimal configuration if file cannot be loaded
        # This ensures the system continues to work even if the config file
        # is missing or corrupted
        return {
            "global_settings": {
                "default_timeout": 30,
                "max_hook_depth": 3,
                "enable_hook_caching": True,
                "hook_execution_order": "priority_desc",
            },
            "exclude_tools": [
                "Tool_RAG",
                "ToolFinderEmbedding",
                "ToolFinderLLM",
            ],
            "hook_type_defaults": {
                "SummarizationHook": {
                    "default_output_length_threshold": 5000,
                    "default_chunk_size": 32000,
                    "default_focus_areas": "key_findings_and_results",
                    "default_max_summary_length": 3000,
                },
                "FileSaveHook": {
                    "default_temp_dir": None,
                    "default_file_prefix": "tool_output",
                    "default_include_metadata": True,
                    "default_auto_cleanup": False,
                    "default_cleanup_age_hours": 24,
                },
            },
            "hooks": [
                {
                    "name": "default_summarization_hook",
                    "type": "SummarizationHook",
                    "enabled": True,
                    "priority": 1,
                    "conditions": {
                        "output_length": {"operator": ">", "threshold": 5000}
                    },
                    "hook_config": {
                        "composer_tool": "OutputSummarizationComposer",
                        "chunk_size": 32000,
                        "focus_areas": "key_findings_and_results",
                        "max_summary_length": 3000,
                    },
                }
            ],
            "tool_specific_hooks": {},
            "category_hooks": {},
        }