Source code for tooluniverse.alphamissense_tool
# alphamissense_tool.py
"""
AlphaMissense API tool for ToolUniverse.
AlphaMissense is DeepMind's deep learning model for predicting the pathogenicity
of missense variants. It provides pathogenicity classifications for ~71 million
possible single amino acid substitutions in the human proteome.
Classifications:
- Pathogenic: score > 0.564
- Ambiguous: 0.34 <= score <= 0.564
- Benign: score < 0.34
API Documentation: https://alphamissense.hegelab.org/
Data Source: Cheng et al., Science 2023
"""
import requests
from typing import Dict, Any, List, Optional
from .base_tool import BaseTool
from .tool_registry import register_tool
# Base URL for AlphaMissense API (hegelab.org)
ALPHAMISSENSE_BASE_URL = "https://alphamissense.hegelab.org"
[docs]
@register_tool("AlphaMissenseTool")
class AlphaMissenseTool(BaseTool):
"""
Tool for querying AlphaMissense pathogenicity predictions.
AlphaMissense uses deep learning trained on evolutionary data to predict
the pathogenicity of all possible single amino acid substitutions in human proteins.
Classification thresholds:
- Pathogenic: score > 0.564
- Ambiguous: 0.34 <= score <= 0.564
- Benign: score < 0.34
No authentication required. Free for academic/research use.
"""
# Classification thresholds from the AlphaMissense paper
PATHOGENIC_THRESHOLD = 0.564
BENIGN_THRESHOLD = 0.34
[docs]
def __init__(self, tool_config: Dict[str, Any]):
super().__init__(tool_config)
self.timeout = tool_config.get("timeout", 30)
self.operation = tool_config.get("fields", {}).get(
"operation", "get_protein_scores"
)
[docs]
def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Execute the AlphaMissense API call."""
operation = self.operation
if operation == "get_protein_scores":
return self._get_protein_scores(arguments)
elif operation == "get_variant_score":
return self._get_variant_score(arguments)
elif operation == "get_residue_scores":
return self._get_residue_scores(arguments)
else:
return {"status": "error", "error": f"Unknown operation: {operation}"}
[docs]
def _classify_score(self, score: float) -> str:
"""Classify pathogenicity based on AlphaMissense thresholds."""
if score > self.PATHOGENIC_THRESHOLD:
return "pathogenic"
elif score < self.BENIGN_THRESHOLD:
return "benign"
else:
return "ambiguous"
[docs]
def _get_protein_scores(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""
Get AlphaMissense scores for a protein by UniProt ID.
Note: The AlphaMissense API requires querying individual residue positions.
This method demonstrates access to the data by sampling the first position.
For complete protein-wide analysis, use get_residue_scores for each position.
"""
uniprot_id = arguments.get("uniprot_id")
if not uniprot_id:
return {"status": "error", "error": "uniprot_id parameter is required"}
try:
# Sample query at position 1 to verify protein exists and get format
url = f"{ALPHAMISSENSE_BASE_URL}/hotspotapi"
params = {"uid": uniprot_id, "resi": 1}
response = requests.get(url, params=params, timeout=self.timeout)
if response.status_code == 404:
return {
"status": "success",
"data": None,
"message": f"No AlphaMissense data found for UniProt ID: {uniprot_id}",
}
# For 400 errors, try a few different positions as protein might not start at position 1
if response.status_code == 400:
# Try position 10 as some proteins don't have data at position 1
for test_pos in [10, 50, 100]:
params = {"uid": uniprot_id, "resi": test_pos}
response = requests.get(url, params=params, timeout=self.timeout)
if response.status_code == 200:
break
# If still failing after trying multiple positions
if response.status_code != 200:
return {
"status": "error",
"error": f"AlphaMissense API returned 400 Bad Request. The UniProt ID '{uniprot_id}' may not be available in AlphaMissense database, or the protein sequence may have no annotated residues.",
}
response.raise_for_status()
data = response.json()
# Get structure file info for comprehensive data access
pdb_url = f"{ALPHAMISSENSE_BASE_URL}/pdb/AF-{uniprot_id}-F1-AM_v4.pdb"
return {
"status": "success",
"data": {
"uniprot_id": uniprot_id,
"sample_residue": data.get("resi", params["resi"]),
"sample_data": data,
"access_info": {
"note": "AlphaMissense API requires per-residue queries. Use AlphaMissense_get_residue_scores for specific positions.",
"pdb_download": pdb_url,
"api_endpoint": f"{ALPHAMISSENSE_BASE_URL}/hotspotapi?uid={uniprot_id}&resi=POSITION",
},
"thresholds": {
"pathogenic": f"> {self.PATHOGENIC_THRESHOLD}",
"ambiguous": f"{self.BENIGN_THRESHOLD} - {self.PATHOGENIC_THRESHOLD}",
"benign": f"< {self.BENIGN_THRESHOLD}",
},
},
}
except requests.exceptions.Timeout:
return {
"status": "error",
"error": f"AlphaMissense API timeout after {self.timeout}s",
}
except requests.exceptions.RequestException as e:
return {
"status": "error",
"error": f"AlphaMissense API request failed: {str(e)}",
}
except Exception as e:
return {"status": "error", "error": f"Unexpected error: {str(e)}"}
[docs]
def _get_variant_score(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""
Get AlphaMissense pathogenicity score for a specific variant.
Variant format: p.X123Y where X is reference amino acid, 123 is position,
and Y is the variant amino acid.
"""
uniprot_id = arguments.get("uniprot_id")
variant = arguments.get("variant")
if not uniprot_id:
return {"status": "error", "error": "uniprot_id parameter is required"}
if not variant:
return {
"status": "error",
"error": "variant parameter is required (e.g., 'p.R123H' or 'R123H')",
}
# Parse variant notation
variant_clean = variant.replace("p.", "").strip()
try:
# Extract position from variant (e.g., "R123H" -> 123)
import re
match = re.match(r"([A-Z])(\d+)([A-Z])", variant_clean)
if not match:
return {
"status": "error",
"error": f"Invalid variant format: {variant}. Expected format: p.X123Y or X123Y (e.g., p.R123H)",
}
ref_aa = match.group(1)
position = int(match.group(2))
alt_aa = match.group(3)
# Query the API
url = f"{ALPHAMISSENSE_BASE_URL}/hotspotapi"
params = {"uid": uniprot_id, "resi": position}
response = requests.get(url, params=params, timeout=self.timeout)
if response.status_code == 404:
return {
"status": "success",
"data": None,
"message": f"No AlphaMissense data found for {uniprot_id} position {position}",
}
response.raise_for_status()
data = response.json()
# Look for the specific variant in the response
score = None
if isinstance(data, dict):
# API may return different formats
scores = data.get("scores", data.get("data", {}))
if isinstance(scores, dict):
score = scores.get(alt_aa)
elif isinstance(scores, list):
for item in scores:
if item.get("aa") == alt_aa or item.get("variant") == alt_aa:
score = item.get("score", item.get("am_pathogenicity"))
break
if score is not None:
classification = self._classify_score(score)
return {
"status": "success",
"data": {
"uniprot_id": uniprot_id,
"variant": f"p.{ref_aa}{position}{alt_aa}",
"position": position,
"reference_aa": ref_aa,
"variant_aa": alt_aa,
"pathogenicity_score": score,
"classification": classification,
"thresholds": {
"pathogenic": f"> {self.PATHOGENIC_THRESHOLD}",
"ambiguous": f"{self.BENIGN_THRESHOLD} - {self.PATHOGENIC_THRESHOLD}",
"benign": f"< {self.BENIGN_THRESHOLD}",
},
},
}
else:
return {
"status": "success",
"data": {
"uniprot_id": uniprot_id,
"variant": f"p.{ref_aa}{position}{alt_aa}",
"raw_response": data,
"message": "Score extraction requires parsing API response format",
},
}
except requests.exceptions.Timeout:
return {
"status": "error",
"error": f"AlphaMissense API timeout after {self.timeout}s",
}
except requests.exceptions.RequestException as e:
return {
"status": "error",
"error": f"AlphaMissense API request failed: {str(e)}",
}
except Exception as e:
return {"status": "error", "error": f"Unexpected error: {str(e)}"}
[docs]
def _get_residue_scores(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""
Get AlphaMissense scores for all possible substitutions at a specific residue.
Returns scores for all 20 amino acid substitutions at the given position.
"""
uniprot_id = arguments.get("uniprot_id")
position = arguments.get("position")
if not uniprot_id:
return {"status": "error", "error": "uniprot_id parameter is required"}
if not position:
return {"status": "error", "error": "position parameter is required"}
try:
position = int(position)
except (ValueError, TypeError):
return {"status": "error", "error": "position must be an integer"}
try:
url = f"{ALPHAMISSENSE_BASE_URL}/hotspotapi"
params = {"uid": uniprot_id, "resi": position}
response = requests.get(url, params=params, timeout=self.timeout)
if response.status_code == 404:
return {
"status": "success",
"data": None,
"message": f"No AlphaMissense data found for {uniprot_id} position {position}",
}
response.raise_for_status()
data = response.json()
return {
"status": "success",
"data": {
"uniprot_id": uniprot_id,
"position": position,
"scores": data,
"thresholds": {
"pathogenic": f"> {self.PATHOGENIC_THRESHOLD}",
"ambiguous": f"{self.BENIGN_THRESHOLD} - {self.PATHOGENIC_THRESHOLD}",
"benign": f"< {self.BENIGN_THRESHOLD}",
},
},
}
except requests.exceptions.Timeout:
return {
"status": "error",
"error": f"AlphaMissense API timeout after {self.timeout}s",
}
except requests.exceptions.RequestException as e:
return {
"status": "error",
"error": f"AlphaMissense API request failed: {str(e)}",
}
except Exception as e:
return {"status": "error", "error": f"Unexpected error: {str(e)}"}