Source code for tooluniverse.swissmodel_tool
# swissmodel_tool.py
"""
SWISS-MODEL Repository REST API tool for ToolUniverse.
SWISS-MODEL Repository is a database of annotated 3D protein structure
models generated by the SWISS-MODEL homology-modelling pipeline.
It provides pre-computed models for UniProt entries.
API: https://swissmodel.expasy.org/repository
No authentication required. Free for academic/research use.
"""
import requests
from typing import Dict, Any
from .base_tool import BaseTool
from .tool_registry import register_tool
SWISSMODEL_BASE_URL = "https://swissmodel.expasy.org/repository"
[docs]
@register_tool("SwissModelTool")
class SwissModelTool(BaseTool):
"""
Tool for querying the SWISS-MODEL Repository.
Provides pre-computed protein homology models for UniProt entries,
including template structures, sequence coverage, quality metrics,
and download coordinates.
No authentication required.
"""
[docs]
def __init__(self, tool_config: Dict[str, Any]):
super().__init__(tool_config)
self.timeout = tool_config.get("timeout", 30)
self.endpoint_type = tool_config.get("fields", {}).get(
"endpoint_type", "get_models"
)
[docs]
def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Execute the SWISS-MODEL API call."""
try:
return self._dispatch(arguments)
except requests.exceptions.Timeout:
return {
"status": "error",
"error": f"SWISS-MODEL API request timed out after {self.timeout} seconds",
}
except requests.exceptions.ConnectionError:
return {
"status": "error",
"error": "Failed to connect to SWISS-MODEL API. Check network connectivity.",
}
except requests.exceptions.HTTPError as e:
return {
"status": "error",
"error": f"SWISS-MODEL API HTTP error: {e.response.status_code}",
}
except Exception as e:
return {
"status": "error",
"error": f"Unexpected error querying SWISS-MODEL: {str(e)}",
}
[docs]
def _dispatch(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Route to appropriate endpoint based on config."""
if self.endpoint_type == "get_models":
return self._get_models(arguments)
elif self.endpoint_type == "get_model_summary":
return self._get_model_summary(arguments)
elif self.endpoint_type == "download_pdb":
return self._download_pdb(arguments)
elif self.endpoint_type == "get_models_batch":
return self._get_models_batch(arguments)
else:
return {
"status": "error",
"error": f"Unknown endpoint_type: {self.endpoint_type}",
}
[docs]
@staticmethod
def _flatten_structure(s: Dict[str, Any]) -> Dict[str, Any]:
"""Flatten a SWISS-MODEL Repository structure record into a model dict."""
model = {
"template": s.get("template", ""),
"method": s.get("method", ""),
"coverage": s.get("coverage"),
"from_residue": s.get("from"),
"to_residue": s.get("to"),
"created_date": s.get("created_date", ""),
"provider": s.get("provider", ""),
"coordinates_url": s.get("coordinates", ""),
}
# Quality metrics
qmean = s.get("qmean")
if isinstance(qmean, dict):
model["qmean_global"] = qmean.get("qmean4_global_score")
model["qmean_z_score"] = qmean.get("qmean_z_score")
# Complex partners if any
complex_with = s.get("in_complex_with", {})
if complex_with:
partners = []
for chain_id, chain_proteins in complex_with.items():
if isinstance(chain_proteins, list):
for p in chain_proteins:
if isinstance(p, dict):
partners.append(
{
"chain": chain_id,
"uniprot_ac": p.get("uniprot_ac", ""),
"description": p.get("description", ""),
}
)
if partners:
model["complex_partners"] = partners[:10]
return model
[docs]
def _get_models(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Get available homology models for a UniProt accession.
Supports optional residue-range, provider, and template filtering via
the documented SWISS-MODEL Repository query parameters.
"""
uniprot_id = arguments.get("uniprot_id", "")
if not uniprot_id:
return {
"status": "error",
"error": "uniprot_id parameter is required (e.g., 'P04637' for human p53)",
}
# Optional documented query-string filters
params: Dict[str, Any] = {}
residue_range = arguments.get("range")
if residue_range:
params["range"] = residue_range
provider = arguments.get("provider")
if provider:
params["provider"] = provider
template = arguments.get("template")
if template:
params["template"] = template
url = f"{SWISSMODEL_BASE_URL}/uniprot/{uniprot_id}.json"
response = requests.get(url, params=params or None, timeout=self.timeout)
response.raise_for_status()
raw = response.json()
result_data = raw.get("result", {})
models = [self._flatten_structure(s) for s in result_data.get("structures", [])]
result = {
"uniprot_id": uniprot_id,
"sequence_length": result_data.get("sequence_length"),
"crc64": result_data.get("crc64"),
"model_count": len(models),
"models": models[:30],
}
# Echo applied filters so the agent knows the result is scoped
if params:
result["filters_applied"] = {
k: v for k, v in params.items() if v is not None
}
return {
"status": "success",
"data": result,
"metadata": {
"source": "SWISS-MODEL Repository",
"api_version": raw.get("api_version", ""),
"query": raw.get("query", uniprot_id),
"endpoint": "get_models",
},
}
[docs]
def _download_pdb(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Download SWISS-MODEL homology-model 3D coordinates (PDB format).
Fetches the actual atomic coordinates (ATOM/HETATM records) for a
UniProt accession, with optional sort/provider/template/range selectors.
"""
uniprot_id = arguments.get("uniprot_id", "")
if not uniprot_id:
return {
"status": "error",
"error": "uniprot_id parameter is required (e.g., 'P04637' for human p53)",
}
params: Dict[str, Any] = {}
sort = arguments.get("sort")
if sort:
params["sort"] = sort
provider = arguments.get("provider")
if provider:
params["provider"] = provider
template = arguments.get("template")
if template:
params["template"] = template
residue_range = arguments.get("range")
if residue_range:
params["range"] = residue_range
url = f"{SWISSMODEL_BASE_URL}/uniprot/{uniprot_id}.pdb"
response = requests.get(url, params=params or None, timeout=self.timeout)
response.raise_for_status()
pdb_text = response.text
atom_count = sum(
1
for line in pdb_text.splitlines()
if line.startswith("ATOM") or line.startswith("HETATM")
)
if atom_count == 0:
return {
"status": "error",
"error": (
f"No atomic coordinates returned for {uniprot_id}"
+ (f" with filters {params}" if params else "")
+ ". The accession may have no SWISS-MODEL model matching the filters."
),
}
data = {
"uniprot_id": uniprot_id,
"format": "pdb",
"pdb_content": pdb_text,
"atom_count": atom_count,
"size_bytes": len(pdb_text.encode("utf-8")),
}
if params:
data["filters_applied"] = params
return {
"status": "success",
"data": data,
"metadata": {
"source": "SWISS-MODEL Repository",
"query": uniprot_id,
"endpoint": "download_pdb",
"content_type": response.headers.get("Content-Type", "text/plain"),
},
}
[docs]
def _get_models_batch(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Batch lookup of SWISS-MODEL models for up to 250 UniProt accessions."""
ids = arguments.get("uniprot_ids")
if isinstance(ids, str):
ids = [x.strip() for x in ids.replace(",", " ").split() if x.strip()]
if not ids:
return {
"status": "error",
"error": (
"uniprot_ids parameter is required: a list (or comma-separated "
"string) of up to 250 UniProt accessions, e.g. "
"['P04637', 'P00533', 'P38398']."
),
}
if len(ids) > 250:
return {
"status": "error",
"error": f"Too many accessions ({len(ids)}). SWISS-MODEL batch limit is 250.",
}
identifiers = ",".join(ids)
url = f"{SWISSMODEL_BASE_URL}/uniprot/{identifiers}.json"
response = requests.get(url, timeout=self.timeout)
response.raise_for_status()
raw = response.json()
results = []
for entry in raw.get("resultset", []):
uniprot_entries = entry.get("uniprot_entries", []) or []
acs = [e.get("ac") for e in uniprot_entries if e.get("ac")]
structures = entry.get("structures", [])
results.append(
{
"uniprot_ids": acs,
"sequence_length": entry.get("sequence_length"),
"crc64": entry.get("crc64"),
"model_count": len(structures),
"models": [self._flatten_structure(s) for s in structures[:30]],
}
)
return {
"status": "success",
"data": {
"requested_count": len(ids),
"returned_count": len(results),
"results": results,
},
"metadata": {
"source": "SWISS-MODEL Repository",
"api_version": raw.get("api_version", ""),
"query": identifiers,
"endpoint": "get_models_batch",
},
}
[docs]
def _get_model_summary(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Get a summary of the best available model for a UniProt accession."""
uniprot_id = arguments.get("uniprot_id", "")
if not uniprot_id:
return {
"status": "error",
"error": "uniprot_id parameter is required (e.g., 'P04637' for human p53)",
}
url = f"{SWISSMODEL_BASE_URL}/uniprot/{uniprot_id}.json"
response = requests.get(url, timeout=self.timeout)
response.raise_for_status()
raw = response.json()
result_data = raw.get("result", {})
structures = result_data.get("structures", [])
# Find best model (highest coverage)
best = None
best_coverage = 0
for s in structures:
cov = s.get("coverage", 0) or 0
if cov > best_coverage:
best_coverage = cov
best = s
summary = {
"uniprot_id": uniprot_id,
"sequence_length": result_data.get("sequence_length"),
"total_models": len(structures),
}
if best:
summary["best_model"] = {
"template": best.get("template", ""),
"method": best.get("method", ""),
"coverage": best.get("coverage"),
"from_residue": best.get("from"),
"to_residue": best.get("to"),
"created_date": best.get("created_date", ""),
"coordinates_url": best.get("coordinates", ""),
}
if "qmean" in best and isinstance(best["qmean"], dict):
summary["best_model"]["qmean_global"] = best["qmean"].get(
"qmean4_global_score"
)
else:
summary["best_model"] = None
# Methods summary
methods = {}
for s in structures:
m = s.get("method", "Unknown")
methods[m] = methods.get(m, 0) + 1
summary["methods_distribution"] = methods
return {
"status": "success",
"data": summary,
"metadata": {
"source": "SWISS-MODEL Repository",
"query": uniprot_id,
"endpoint": "get_model_summary",
},
}