Source code for tooluniverse.swissmodel_tool

# swissmodel_tool.py
"""
SWISS-MODEL Repository REST API tool for ToolUniverse.

SWISS-MODEL Repository is a database of annotated 3D protein structure
models generated by the SWISS-MODEL homology-modelling pipeline.
It provides pre-computed models for UniProt entries.

API: https://swissmodel.expasy.org/repository
No authentication required. Free for academic/research use.
"""

import requests
from typing import Dict, Any
from .base_tool import BaseTool
from .tool_registry import register_tool

SWISSMODEL_BASE_URL = "https://swissmodel.expasy.org/repository"


[docs] @register_tool("SwissModelTool") class SwissModelTool(BaseTool): """ Tool for querying the SWISS-MODEL Repository. Provides pre-computed protein homology models for UniProt entries, including template structures, sequence coverage, quality metrics, and download coordinates. No authentication required. """
[docs] def __init__(self, tool_config: Dict[str, Any]): super().__init__(tool_config) self.timeout = tool_config.get("timeout", 30) self.endpoint_type = tool_config.get("fields", {}).get( "endpoint_type", "get_models" )
[docs] def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Execute the SWISS-MODEL API call.""" try: return self._dispatch(arguments) except requests.exceptions.Timeout: return { "status": "error", "error": f"SWISS-MODEL API request timed out after {self.timeout} seconds", } except requests.exceptions.ConnectionError: return { "status": "error", "error": "Failed to connect to SWISS-MODEL API. Check network connectivity.", } except requests.exceptions.HTTPError as e: return { "status": "error", "error": f"SWISS-MODEL API HTTP error: {e.response.status_code}", } except Exception as e: return { "status": "error", "error": f"Unexpected error querying SWISS-MODEL: {str(e)}", }
[docs] def _dispatch(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Route to appropriate endpoint based on config.""" if self.endpoint_type == "get_models": return self._get_models(arguments) elif self.endpoint_type == "get_model_summary": return self._get_model_summary(arguments) elif self.endpoint_type == "download_pdb": return self._download_pdb(arguments) elif self.endpoint_type == "get_models_batch": return self._get_models_batch(arguments) else: return { "status": "error", "error": f"Unknown endpoint_type: {self.endpoint_type}", }
[docs] @staticmethod def _flatten_structure(s: Dict[str, Any]) -> Dict[str, Any]: """Flatten a SWISS-MODEL Repository structure record into a model dict.""" model = { "template": s.get("template", ""), "method": s.get("method", ""), "coverage": s.get("coverage"), "from_residue": s.get("from"), "to_residue": s.get("to"), "created_date": s.get("created_date", ""), "provider": s.get("provider", ""), "coordinates_url": s.get("coordinates", ""), } # Quality metrics qmean = s.get("qmean") if isinstance(qmean, dict): model["qmean_global"] = qmean.get("qmean4_global_score") model["qmean_z_score"] = qmean.get("qmean_z_score") # Complex partners if any complex_with = s.get("in_complex_with", {}) if complex_with: partners = [] for chain_id, chain_proteins in complex_with.items(): if isinstance(chain_proteins, list): for p in chain_proteins: if isinstance(p, dict): partners.append( { "chain": chain_id, "uniprot_ac": p.get("uniprot_ac", ""), "description": p.get("description", ""), } ) if partners: model["complex_partners"] = partners[:10] return model
[docs] def _get_models(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Get available homology models for a UniProt accession. Supports optional residue-range, provider, and template filtering via the documented SWISS-MODEL Repository query parameters. """ uniprot_id = arguments.get("uniprot_id", "") if not uniprot_id: return { "status": "error", "error": "uniprot_id parameter is required (e.g., 'P04637' for human p53)", } # Optional documented query-string filters params: Dict[str, Any] = {} residue_range = arguments.get("range") if residue_range: params["range"] = residue_range provider = arguments.get("provider") if provider: params["provider"] = provider template = arguments.get("template") if template: params["template"] = template url = f"{SWISSMODEL_BASE_URL}/uniprot/{uniprot_id}.json" response = requests.get(url, params=params or None, timeout=self.timeout) response.raise_for_status() raw = response.json() result_data = raw.get("result", {}) models = [self._flatten_structure(s) for s in result_data.get("structures", [])] result = { "uniprot_id": uniprot_id, "sequence_length": result_data.get("sequence_length"), "crc64": result_data.get("crc64"), "model_count": len(models), "models": models[:30], } # Echo applied filters so the agent knows the result is scoped if params: result["filters_applied"] = { k: v for k, v in params.items() if v is not None } return { "status": "success", "data": result, "metadata": { "source": "SWISS-MODEL Repository", "api_version": raw.get("api_version", ""), "query": raw.get("query", uniprot_id), "endpoint": "get_models", }, }
[docs] def _download_pdb(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Download SWISS-MODEL homology-model 3D coordinates (PDB format). Fetches the actual atomic coordinates (ATOM/HETATM records) for a UniProt accession, with optional sort/provider/template/range selectors. """ uniprot_id = arguments.get("uniprot_id", "") if not uniprot_id: return { "status": "error", "error": "uniprot_id parameter is required (e.g., 'P04637' for human p53)", } params: Dict[str, Any] = {} sort = arguments.get("sort") if sort: params["sort"] = sort provider = arguments.get("provider") if provider: params["provider"] = provider template = arguments.get("template") if template: params["template"] = template residue_range = arguments.get("range") if residue_range: params["range"] = residue_range url = f"{SWISSMODEL_BASE_URL}/uniprot/{uniprot_id}.pdb" response = requests.get(url, params=params or None, timeout=self.timeout) response.raise_for_status() pdb_text = response.text atom_count = sum( 1 for line in pdb_text.splitlines() if line.startswith("ATOM") or line.startswith("HETATM") ) if atom_count == 0: return { "status": "error", "error": ( f"No atomic coordinates returned for {uniprot_id}" + (f" with filters {params}" if params else "") + ". The accession may have no SWISS-MODEL model matching the filters." ), } data = { "uniprot_id": uniprot_id, "format": "pdb", "pdb_content": pdb_text, "atom_count": atom_count, "size_bytes": len(pdb_text.encode("utf-8")), } if params: data["filters_applied"] = params return { "status": "success", "data": data, "metadata": { "source": "SWISS-MODEL Repository", "query": uniprot_id, "endpoint": "download_pdb", "content_type": response.headers.get("Content-Type", "text/plain"), }, }
[docs] def _get_models_batch(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Batch lookup of SWISS-MODEL models for up to 250 UniProt accessions.""" ids = arguments.get("uniprot_ids") if isinstance(ids, str): ids = [x.strip() for x in ids.replace(",", " ").split() if x.strip()] if not ids: return { "status": "error", "error": ( "uniprot_ids parameter is required: a list (or comma-separated " "string) of up to 250 UniProt accessions, e.g. " "['P04637', 'P00533', 'P38398']." ), } if len(ids) > 250: return { "status": "error", "error": f"Too many accessions ({len(ids)}). SWISS-MODEL batch limit is 250.", } identifiers = ",".join(ids) url = f"{SWISSMODEL_BASE_URL}/uniprot/{identifiers}.json" response = requests.get(url, timeout=self.timeout) response.raise_for_status() raw = response.json() results = [] for entry in raw.get("resultset", []): uniprot_entries = entry.get("uniprot_entries", []) or [] acs = [e.get("ac") for e in uniprot_entries if e.get("ac")] structures = entry.get("structures", []) results.append( { "uniprot_ids": acs, "sequence_length": entry.get("sequence_length"), "crc64": entry.get("crc64"), "model_count": len(structures), "models": [self._flatten_structure(s) for s in structures[:30]], } ) return { "status": "success", "data": { "requested_count": len(ids), "returned_count": len(results), "results": results, }, "metadata": { "source": "SWISS-MODEL Repository", "api_version": raw.get("api_version", ""), "query": identifiers, "endpoint": "get_models_batch", }, }
[docs] def _get_model_summary(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Get a summary of the best available model for a UniProt accession.""" uniprot_id = arguments.get("uniprot_id", "") if not uniprot_id: return { "status": "error", "error": "uniprot_id parameter is required (e.g., 'P04637' for human p53)", } url = f"{SWISSMODEL_BASE_URL}/uniprot/{uniprot_id}.json" response = requests.get(url, timeout=self.timeout) response.raise_for_status() raw = response.json() result_data = raw.get("result", {}) structures = result_data.get("structures", []) # Find best model (highest coverage) best = None best_coverage = 0 for s in structures: cov = s.get("coverage", 0) or 0 if cov > best_coverage: best_coverage = cov best = s summary = { "uniprot_id": uniprot_id, "sequence_length": result_data.get("sequence_length"), "total_models": len(structures), } if best: summary["best_model"] = { "template": best.get("template", ""), "method": best.get("method", ""), "coverage": best.get("coverage"), "from_residue": best.get("from"), "to_residue": best.get("to"), "created_date": best.get("created_date", ""), "coordinates_url": best.get("coordinates", ""), } if "qmean" in best and isinstance(best["qmean"], dict): summary["best_model"]["qmean_global"] = best["qmean"].get( "qmean4_global_score" ) else: summary["best_model"] = None # Methods summary methods = {} for s in structures: m = s.get("method", "Unknown") methods[m] = methods.get(m, 0) + 1 summary["methods_distribution"] = methods return { "status": "success", "data": summary, "metadata": { "source": "SWISS-MODEL Repository", "query": uniprot_id, "endpoint": "get_model_summary", }, }