tooluniverse.gpcrdb_tool 源代码
"""
GPCRdb API tool for ToolUniverse.
GPCRdb is a comprehensive database for G protein-coupled receptors (GPCRs),
which are the targets of ~35% of all approved drugs.
API Documentation: https://docs.gpcrdb.org/web_services.html
No authentication required.
"""
import html
import re
import requests
from typing import Dict, Any
from .base_tool import BaseTool
from .tool_registry import register_tool
# Base URL for GPCRdb API
GPCRDB_API_URL = "https://gpcrdb.org/services"
_HTML_TAG_RE = re.compile(r"<[^>]+>")
[文档]
@register_tool("GPCRdbTool")
class GPCRdbTool(BaseTool):
"""
Tool for querying GPCRdb GPCR database.
GPCRdb provides:
- GPCR protein information and classification
- Structure data for GPCR crystal/cryo-EM structures
- Ligand binding data
- Mutation data and effects
- Sequence alignments
No authentication required. Free public access.
"""
[文档]
def __init__(self, tool_config: Dict[str, Any]):
super().__init__(tool_config)
self.timeout: int = tool_config.get("timeout", 30)
self.parameter = tool_config.get("parameter", {})
[文档]
def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Execute GPCRdb API call based on operation type."""
# Normalize aliases → protein
if not arguments.get("protein"):
alias = (
arguments.get("protein_id")
or arguments.get("receptor_name")
or arguments.get("protein_name")
)
if alias:
arguments = dict(arguments, protein=alias)
operation = arguments.get("operation", "")
# Auto-fill operation from tool config const if not provided by user
if not operation:
operation = self.get_schema_const_operation()
if operation == "get_protein":
return self._get_protein(arguments)
elif operation == "list_proteins":
return self._list_proteins(arguments)
elif operation == "get_structures":
return self._get_structures(arguments)
elif operation == "get_ligands":
return self._get_ligands(arguments)
elif operation == "get_mutations":
return self._get_mutations(arguments)
else:
return {
"status": "error",
"error": f"Unknown operation: {operation}. Supported: get_protein, list_proteins, get_structures, get_ligands, get_mutations",
}
[文档]
def _normalize_protein(self, protein: str) -> str:
"""Resolve gene symbol (e.g. ADRB2) to GPCRdb entry name (e.g. adrb2_human)."""
if protein and "_" not in protein:
return f"{protein.lower()}_human"
return protein
[文档]
def _get_protein(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""
Get detailed protein information for a GPCR.
Args:
arguments: Dict containing:
- protein: Protein entry name (e.g., adrb2_human) or UniProt accession
"""
protein = arguments.get("protein", "")
if not protein:
return {"status": "error", "error": "Missing required parameter: protein"}
try:
response = requests.get(
f"{GPCRDB_API_URL}/protein/{protein}/",
timeout=self.timeout,
headers={
"Accept": "application/json",
"User-Agent": "ToolUniverse/GPCRdb",
},
)
response.raise_for_status()
data = response.json()
# Strip HTML tags/entities from name field (GPCRdb returns e.g. "β<sub>2</sub>-adrenoceptor")
if isinstance(data, dict) and "name" in data:
data["name"] = _HTML_TAG_RE.sub("", html.unescape(data["name"]))
return {
"status": "success",
"data": data,
"metadata": {
"source": "GPCRdb",
"protein": protein,
},
}
except requests.exceptions.HTTPError as e:
if e.response.status_code == 404:
# Try accession endpoint (for UniProt IDs like P07550)
try:
acc_response = requests.get(
f"{GPCRDB_API_URL}/protein/accession/{protein}/",
timeout=self.timeout,
headers={
"Accept": "application/json",
"User-Agent": "ToolUniverse/GPCRdb",
},
)
acc_response.raise_for_status()
data = acc_response.json()
if isinstance(data, dict) and "name" in data:
data["name"] = _HTML_TAG_RE.sub("", html.unescape(data["name"]))
return {
"status": "success",
"data": data,
"metadata": {"source": "GPCRdb", "protein": protein},
}
except Exception:
pass
# Fallback: try {lowercase_symbol}_human (e.g. CCR5 → ccr5_human)
if "_" not in protein:
human_entry = f"{protein.lower()}_human"
try:
fb_response = requests.get(
f"{GPCRDB_API_URL}/protein/{human_entry}/",
timeout=self.timeout,
headers={
"Accept": "application/json",
"User-Agent": "ToolUniverse/GPCRdb",
},
)
fb_response.raise_for_status()
data = fb_response.json()
if isinstance(data, dict) and "name" in data:
data["name"] = _HTML_TAG_RE.sub(
"", html.unescape(data["name"])
)
return {
"status": "success",
"data": data,
"metadata": {
"source": "GPCRdb",
"protein": human_entry,
"resolved_from": protein,
},
}
except Exception:
pass
return {
"status": "error",
"error": f"Protein not found: {protein}. Use GPCRdb entry name (e.g. adrb2_human) or UniProt accession (e.g. P07550).",
}
return {"status": "error", "error": f"HTTP error: {e.response.status_code}"}
except requests.exceptions.RequestException as e:
return {"status": "error", "error": f"Request failed: {str(e)}"}
except Exception as e:
return {"status": "error", "error": f"Unexpected error: {str(e)}"}
# Map human-readable class/family names to GPCRdb numeric slugs (Feature-122A-001)
_FAMILY_NAME_TO_SLUG = {
"class a": "001",
"class a (rhodopsin)": "001",
"rhodopsin": "001",
"class b": "002",
"class b1": "002",
"secretin": "002",
"class b2": "003",
"adhesion": "003",
"class c": "004",
"glutamate": "004",
"class f": "005",
"frizzled": "005",
"class t": "006",
"taste2": "006",
"aminergic": "001_001",
"aminergic receptors": "001_001",
"peptide receptors": "001_003",
"chemokine receptors": "001_003_002",
"chemokine": "001_003_002",
"purine receptors": "001_004",
"lipid receptors": "001_007",
"serotonin": "001_001_001",
"5-hydroxytryptamine": "001_001_001",
"dopamine": "001_001_004",
"adrenoceptor": "001_001_003",
"adrenergic": "001_001_003",
"adrenergic receptors": "001_001_003",
"muscarinic": "001_001_002",
"histamine": "001_001_005",
"beta-adrenergic": "001_001_003_008",
"opioid": "001_003_015",
"endothelin": "001_003_006",
}
[文档]
def _list_proteins(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""
List GPCR protein families from GPCRdb.
Args:
arguments: Dict containing:
- family: GPCR family slug (e.g., '001') or human-readable name
(e.g., 'Chemokine receptors'). If provided, returns proteins in
that family.
- protein_class: Alias for family; accepts human-readable names.
Note: GPCRdb API does not support listing all proteins by species alone.
Without family, returns list of protein families.
"""
family = arguments.get("family") or arguments.get("protein_class", "")
# Resolve human-readable class names to numeric slugs (Feature-122A-001)
if family and not family.replace("_", "").isdigit():
resolved = self._FAMILY_NAME_TO_SLUG.get(family.lower())
if resolved:
family = resolved
try:
if family:
# List proteins in specific family
url = f"{GPCRDB_API_URL}/proteinfamily/proteins/{family}/"
else:
# List protein families (no endpoint for all proteins by species)
url = f"{GPCRDB_API_URL}/proteinfamily/"
response = requests.get(
url,
timeout=self.timeout,
headers={
"Accept": "application/json",
"User-Agent": "ToolUniverse/GPCRdb",
},
)
response.raise_for_status()
data = response.json()
proteins = data if isinstance(data, list) else [data]
# Strip HTML entities and tags from name fields (Feature-123B-002)
for item in proteins:
if isinstance(item, dict) and isinstance(item.get("name"), str):
item["name"] = html.unescape(_HTML_TAG_RE.sub("", item["name"]))
note = None
if family and len(proteins) == 0:
note = (
f"No proteins found for family slug '{family}'. "
"The 'family' parameter requires a numeric GPCRdb slug (e.g., '001_003_002' "
"for Chemokine receptors). Use protein_class with a human-readable name "
"(e.g., 'Chemokine receptors') or call without 'family' to discover slugs."
)
elif not family:
note = (
"To list proteins in a specific family, pass its numeric slug as 'family' "
"(e.g., '001_003_002') or use protein_class with a human-readable name "
"(e.g., 'Chemokine receptors'). Call without arguments to discover all slugs."
)
return {
"status": "success",
"data": {
"proteins": proteins,
"count": len(proteins),
"family": family if family else "all families",
**({"note": note} if note else {}),
},
"metadata": {
"source": "GPCRdb",
},
}
except requests.exceptions.RequestException as e:
return {"status": "error", "error": f"Request failed: {str(e)}"}
except Exception as e:
return {"status": "error", "error": f"Unexpected error: {str(e)}"}
[文档]
def _get_structures(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""
Get GPCR structure information.
Args:
arguments: Dict containing:
- protein: Protein entry name (e.g., adrb2_human) or gene symbol (e.g., ADRB2) — optional
- state: Receptor state filter (active, inactive, intermediate)
"""
protein = self._normalize_protein(arguments.get("protein", ""))
state = arguments.get("state", "")
resolution = arguments.get("resolution")
try:
if protein:
url = f"{GPCRDB_API_URL}/structure/protein/{protein}/"
else:
url = f"{GPCRDB_API_URL}/structure/"
response = requests.get(
url,
timeout=self.timeout,
headers={
"Accept": "application/json",
"User-Agent": "ToolUniverse/GPCRdb",
},
)
response.raise_for_status()
data = response.json()
structures = data if isinstance(data, list) else [data]
# Filter by state if specified
if state:
structures = [
s for s in structures if s.get("state", "").lower() == state.lower()
]
# Filter by max resolution (client-side, GPCRdb API has no resolution param)
if resolution is not None:
try:
max_res = float(resolution)
structures = [
s
for s in structures
if s.get("resolution") is not None
and float(s["resolution"]) <= max_res
]
except (ValueError, TypeError):
pass
return {
"status": "success",
"data": {
"structures": structures,
"count": len(structures),
"protein": protein if protein else "all",
"state_filter": state if state else "all",
},
"metadata": {
"source": "GPCRdb",
},
}
except requests.exceptions.HTTPError as e:
if e.response.status_code == 404:
return {
"status": "success",
"data": {"structures": [], "count": 0},
"metadata": {"note": "No structures found"},
}
return {"status": "error", "error": f"HTTP error: {e.response.status_code}"}
except requests.exceptions.RequestException as e:
return {"status": "error", "error": f"Request failed: {str(e)}"}
except Exception as e:
return {"status": "error", "error": f"Unexpected error: {str(e)}"}
[文档]
def _get_ligands(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""
Get ligands associated with a GPCR.
Args:
arguments: Dict containing:
- protein: Protein entry name (e.g., adrb2_human) or gene symbol (e.g., ADRB2)
"""
protein = self._normalize_protein(arguments.get("protein", ""))
if not protein:
return {"status": "error", "error": "Missing required parameter: protein"}
try:
response = requests.get(
f"{GPCRDB_API_URL}/ligands/{protein}/",
timeout=self.timeout,
headers={
"Accept": "application/json",
"User-Agent": "ToolUniverse/GPCRdb",
},
)
response.raise_for_status()
data = response.json()
ligands = data if isinstance(data, list) else data.get("ligands", [])
# Filter by ligand type if specified (e.g., agonist, antagonist, inhibitor)
# GPCRdb API returns "Ligand type" field (e.g., "small molecule", "peptide")
ligand_type = (
arguments.get("type") or arguments.get("ligand_type") or ""
).lower()
if ligand_type:
ligands = [
lig
for lig in ligands
if ligand_type
in (lig.get("Ligand type") or lig.get("type") or "").lower()
]
total_count = len(ligands)
# Apply limit/max_results client-side (Feature-122A-003)
limit = arguments.get("limit") or arguments.get("max_results")
if limit is not None:
try:
ligands = ligands[: int(limit)]
except (TypeError, ValueError):
pass
# Sanitize HTML entities and fix nan DOIs in each ligand record
for lig in ligands:
for field in ("Ligand name", "Protein name"):
if isinstance(lig.get(field), str):
lig[field] = _HTML_TAG_RE.sub("", html.unescape(lig[field]))
doi = lig.get("DOI", "")
if isinstance(doi, str) and doi.lower().endswith("/nan"):
lig["DOI"] = None
result: Dict[str, Any] = {
"protein": protein,
"ligands": ligands,
"count": len(ligands),
"total_count": total_count,
}
if limit is not None and total_count > len(ligands):
result["note"] = (
f"Showing {len(ligands)} of {total_count} ligands. Increase limit to retrieve more."
)
return {
"status": "success",
"data": result,
"metadata": {
"source": "GPCRdb",
"protein": protein,
},
}
except requests.exceptions.HTTPError as e:
if e.response.status_code == 404:
return {
"status": "success",
"data": {"protein": protein, "ligands": [], "count": 0},
"metadata": {"note": "No ligands found for this protein"},
}
return {"status": "error", "error": f"HTTP error: {e.response.status_code}"}
except requests.exceptions.RequestException as e:
return {"status": "error", "error": f"Request failed: {str(e)}"}
except Exception as e:
return {"status": "error", "error": f"Unexpected error: {str(e)}"}
[文档]
def _get_mutations(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""
Get mutation data for a GPCR.
Args:
arguments: Dict containing:
- protein: Protein entry name (e.g., adrb2_human) or gene symbol (e.g., ADRB2)
"""
protein = self._normalize_protein(arguments.get("protein", ""))
if not protein:
return {"status": "error", "error": "Missing required parameter: protein"}
try:
response = requests.get(
f"{GPCRDB_API_URL}/mutants/protein/{protein}/",
timeout=self.timeout,
headers={
"Accept": "application/json",
"User-Agent": "ToolUniverse/GPCRdb",
},
)
response.raise_for_status()
data = response.json()
mutations = data if isinstance(data, list) else data.get("mutations", [])
result: Dict[str, Any] = {
"protein": protein,
"mutations": mutations,
"count": len(mutations),
}
if len(mutations) == 0:
result["note"] = (
"The GPCRdb mutations API (/services/mutants/) currently returns empty results for all receptors. For mutation data, visit https://gpcrdb.org/mutations/."
)
return {
"status": "success",
"data": result,
"metadata": {
"source": "GPCRdb",
"protein": protein,
},
}
except requests.exceptions.HTTPError as e:
if e.response.status_code == 404:
return {
"status": "success",
"data": {"protein": protein, "mutations": [], "count": 0},
"metadata": {"note": "No mutation data found"},
}
return {"status": "error", "error": f"HTTP error: {e.response.status_code}"}
except requests.exceptions.RequestException as e:
return {"status": "error", "error": f"Request failed: {str(e)}"}
except Exception as e:
return {"status": "error", "error": f"Unexpected error: {str(e)}"}