Source code for tooluniverse.pdbepisa_tool
"""
PDBePISA - Protein Interfaces, Surfaces, and Assemblies Tool
Provides access to the PDBePISA CGI API at EBI for analyzing protein
crystal structures to determine biological assemblies, protein-protein
interfaces, and monomer surface areas.
PISA (Proteins, Interfaces, Structures and Assemblies) is used for the
exploration of macromolecular interfaces, prediction of probable biological
assemblies, database searches of structurally similar interfaces and
assemblies, and identification of protein interfaces in crystal packings.
API base: https://www.ebi.ac.uk/pdbe/pisa/cgi-bin/
Returns XML responses which are parsed into structured JSON.
No authentication required.
Reference: Krissinel & Henrick, J Mol Biol 2007 (PMID: 17681537)
"""
import requests
import xml.etree.ElementTree as ET
from typing import Any, Dict, List, Optional
from .base_tool import BaseTool
from .tool_registry import register_tool
PISA_BASE_URL = "https://www.ebi.ac.uk/pdbe/pisa/cgi-bin"
def _safe_float(text: Optional[str]) -> Optional[float]:
"""Safely convert text to float, returning None on failure."""
if text is None:
return None
text = text.strip()
if not text:
return None
try:
return float(text)
except (ValueError, TypeError):
return None
def _safe_int(text: Optional[str]) -> Optional[int]:
"""Safely convert text to int, returning None on failure."""
if text is None:
return None
text = text.strip()
if not text:
return None
try:
return int(text)
except (ValueError, TypeError):
return None
def _parse_bond(bond_el: ET.Element) -> Dict[str, Any]:
"""Parse a bond (h-bond, salt-bridge, ss-bond, cov-bond) element."""
return {
"chain_1": (bond_el.findtext("chain-1") or "").strip(),
"residue_1": (bond_el.findtext("res-1") or "").strip(),
"seqnum_1": _safe_int(bond_el.findtext("seqnum-1")),
"atom_1": (bond_el.findtext("atname-1") or "").strip(),
"chain_2": (bond_el.findtext("chain-2") or "").strip(),
"residue_2": (bond_el.findtext("res-2") or "").strip(),
"seqnum_2": _safe_int(bond_el.findtext("seqnum-2")),
"atom_2": (bond_el.findtext("atname-2") or "").strip(),
"distance": _safe_float(bond_el.findtext("dist")),
}
def _parse_bonds_section(section_el: Optional[ET.Element]) -> Dict[str, Any]:
"""Parse a bonds section (h-bonds, salt-bridges, etc.)."""
if section_el is None:
return {"count": 0, "bonds": []}
n_bonds = _safe_int(section_el.findtext("n_bonds")) or 0
bonds = [_parse_bond(b) for b in section_el.findall("bond")]
return {"count": n_bonds, "bonds": bonds}
def _parse_interface(iface_el: ET.Element) -> Dict[str, Any]:
"""Parse a single interface element from PISA XML."""
result = {
"id": _safe_int(iface_el.findtext("id")),
"type": _safe_int(iface_el.findtext("type")),
"n_occurrences": _safe_int(iface_el.findtext("n_occ")),
"interface_area": _safe_float(iface_el.findtext("int_area")),
"solvation_energy": _safe_float(iface_el.findtext("int_solv_en")),
"p_value": _safe_float(iface_el.findtext("pvalue")),
"stabilization_energy": _safe_float(iface_el.findtext("stab_en")),
"css_score": _safe_float(iface_el.findtext("css")),
"overlap": (iface_el.findtext("overlap") or "").strip(),
"x_rel": (iface_el.findtext("x-rel") or "").strip(),
"fixed": (iface_el.findtext("fixed") or "").strip(),
"h_bonds": _parse_bonds_section(iface_el.find("h-bonds")),
"salt_bridges": _parse_bonds_section(iface_el.find("salt-bridges")),
"ss_bonds": _parse_bonds_section(iface_el.find("ss-bonds")),
"cov_bonds": _parse_bonds_section(iface_el.find("cov-bonds")),
}
# Parse molecule info
molecules = []
for mol in iface_el.findall("molecule"):
mol_data = {
"id": _safe_int(mol.findtext("id")),
"chain_id": (mol.findtext("chain_id") or "").strip(),
"mol_class": (mol.findtext("class") or "").strip(),
"symop": (mol.findtext("symop") or "").strip(),
"int_nres": _safe_int(mol.findtext("int_nres")),
"int_area": _safe_float(mol.findtext("int_area")),
"int_solv_en": _safe_float(mol.findtext("int_solv_en")),
"pvalue": _safe_float(mol.findtext("pvalue")),
}
molecules.append(mol_data)
result["molecules"] = molecules
return result
def _parse_assembly(asm_el: ET.Element) -> Dict[str, Any]:
"""Parse a single assembly element from PISA XML."""
return {
"id": _safe_int(asm_el.findtext("id")),
"size": _safe_int(asm_el.findtext("size")),
"macromolecular_size": _safe_int(asm_el.findtext("mmsize")),
"stability_score": (asm_el.findtext("score") or "").strip(),
"dissociation_energy": _safe_float(asm_el.findtext("diss_energy")),
"accessible_surface_area": _safe_float(asm_el.findtext("asa")),
"buried_surface_area": _safe_float(asm_el.findtext("bsa")),
"entropy": _safe_float(asm_el.findtext("entropy")),
"dissociation_area": _safe_float(asm_el.findtext("diss_area")),
"interaction_energy": _safe_float(asm_el.findtext("int_energy")),
"n_unit_cells": _safe_int(asm_el.findtext("n_uc")),
"n_dissociable": _safe_int(asm_el.findtext("n_diss")),
"symmetry_number": _safe_int(asm_el.findtext("symNumber")),
"R350": _safe_int(asm_el.findtext("R350")),
"formula": (asm_el.findtext("formula") or "").strip(),
"composition": (asm_el.findtext("composition") or "").strip(),
}
[docs]
@register_tool("PDBePISATool")
class PDBePISATool(BaseTool):
"""
Tool for querying PDBePISA (Protein Interfaces, Surfaces and Assemblies).
PISA analyzes crystal structures to identify biological assemblies,
protein-protein interfaces, and surface properties.
Supported operations:
- get_interfaces: Get interface analysis for a PDB entry
- get_assemblies: Get biological assembly predictions
- get_monomer_analysis: Get monomer surface area analysis
"""
[docs]
def __init__(self, tool_config: Dict[str, Any]):
super().__init__(tool_config)
self.session = requests.Session()
self.timeout = 30
[docs]
def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Execute the PDBePISA API tool with given arguments."""
# Feature-68B-006: operation is now an internal config value, not a user parameter.
# Fall back to arguments for backward compatibility.
operation = self.tool_config.get("fields", {}).get(
"operation", arguments.get("operation")
)
if not operation:
return {"status": "error", "error": "Missing required parameter: operation"}
operation_handlers = {
"get_interfaces": self._get_interfaces,
"get_assemblies": self._get_assemblies,
"get_monomer_analysis": self._get_monomer_analysis,
}
handler = operation_handlers.get(operation)
if not handler:
return {
"status": "error",
"error": f"Unknown operation: {operation}",
"available_operations": list(operation_handlers.keys()),
}
try:
return handler(arguments)
except requests.exceptions.Timeout:
return {"status": "error", "error": "PDBePISA API request timed out"}
except requests.exceptions.ConnectionError:
return {"status": "error", "error": "Failed to connect to PDBePISA API"}
except ET.ParseError as e:
return {
"status": "error",
"error": f"Failed to parse XML response: {str(e)}",
}
except Exception as e:
return {"status": "error", "error": f"PDBePISA operation failed: {str(e)}"}
[docs]
def _fetch_xml(self, endpoint: str, pdb_id: str) -> Dict[str, Any]:
"""Fetch and parse XML from a PISA CGI endpoint."""
url = f"{PISA_BASE_URL}/{endpoint}?{pdb_id.lower()}"
response = self.session.get(url, timeout=self.timeout)
if response.status_code != 200:
return {
"ok": False,
"error": f"PDBePISA returned status {response.status_code}",
"detail": response.text[:500],
}
try:
root = ET.fromstring(response.text)
except ET.ParseError as e:
return {"ok": False, "error": f"Failed to parse XML: {str(e)}"}
# Check PISA status
status = (root.findtext("status") or "").strip()
if status != "Ok":
return {"ok": False, "error": f"PDBePISA status: {status}"}
entry = root.find("pdb_entry")
if entry is None:
return {"ok": False, "error": "No pdb_entry element in response"}
entry_status = (entry.findtext("status") or "").strip()
if entry_status != "Ok":
return {"ok": False, "error": f"PDB entry status: {entry_status}"}
return {"ok": True, "entry": entry}
[docs]
def _get_interfaces(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Get interface analysis for a PDB entry."""
pdb_id = arguments.get("pdb_id")
if not pdb_id:
return {"status": "error", "error": "pdb_id parameter is required"}
result = self._fetch_xml("interfaces.pisa", pdb_id)
if not result["ok"]:
return {"status": "error", "error": result["error"]}
entry = result["entry"]
n_interfaces = _safe_int(entry.findtext("n_interfaces")) or 0
interfaces = [_parse_interface(iface) for iface in entry.findall("interface")]
return {
"status": "success",
"data": {
"pdb_code": (entry.findtext("pdb_code") or "").strip(),
"n_interfaces": n_interfaces,
"interfaces": interfaces,
},
}
[docs]
def _get_assemblies(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Get biological assembly predictions for a PDB entry."""
pdb_id = arguments.get("pdb_id")
if not pdb_id:
return {"status": "error", "error": "pdb_id parameter is required"}
result = self._fetch_xml("multimers.pisa", pdb_id)
if not result["ok"]:
return {"status": "error", "error": result["error"]}
entry = result["entry"]
total_asm = _safe_int(entry.findtext("total_asm")) or 0
assembly_sets = []
for asm_set in entry.findall("asm_set"):
ser_no = _safe_int(asm_set.findtext("ser_no"))
assemblies = [_parse_assembly(asm) for asm in asm_set.findall("assembly")]
assembly_sets.append(
{
"set_number": ser_no,
"assemblies": assemblies,
}
)
return {
"status": "success",
"data": {
"pdb_code": (entry.findtext("pdb_code") or "").strip(),
"total_assemblies": total_asm,
"assembly_sets": assembly_sets,
},
}
[docs]
def _get_monomer_analysis(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""
Get monomer (individual chain) surface area analysis.
Uses the interfaces endpoint and extracts per-molecule data
from interface analyses, providing surface area information
for each chain in the structure.
"""
pdb_id = arguments.get("pdb_id")
if not pdb_id:
return {"status": "error", "error": "pdb_id parameter is required"}
# Use interfaces endpoint to extract molecule-level data
result = self._fetch_xml("interfaces.pisa", pdb_id)
if not result["ok"]:
return {"status": "error", "error": result["error"]}
entry = result["entry"]
# Collect unique chains and their interface participations
chain_data = {}
interfaces = entry.findall("interface")
for iface in interfaces:
iface_id = _safe_int(iface.findtext("id"))
for mol in iface.findall("molecule"):
chain_id = (mol.findtext("chain_id") or "").strip()
mol_class = (mol.findtext("class") or "").strip()
int_area = _safe_float(mol.findtext("int_area"))
int_solv_en = _safe_float(mol.findtext("int_solv_en"))
int_nres = _safe_int(mol.findtext("int_nres"))
if chain_id not in chain_data:
chain_data[chain_id] = {
"chain_id": chain_id,
"mol_class": mol_class,
"total_interface_area": 0.0,
"interface_count": 0,
"interface_participations": [],
}
if int_area is not None:
chain_data[chain_id]["total_interface_area"] += int_area
chain_data[chain_id]["interface_count"] += 1
chain_data[chain_id]["interface_participations"].append(
{
"interface_id": iface_id,
"interface_area": int_area,
"solvation_energy": int_solv_en,
"interface_residues": int_nres,
}
)
chains = list(chain_data.values())
return {
"status": "success",
"data": {
"pdb_code": (entry.findtext("pdb_code") or "").strip(),
"n_interfaces": _safe_int(entry.findtext("n_interfaces")) or 0,
"chains": chains,
},
}