Source code for tooluniverse.ebi_proteins_features_tool

# ebi_proteins_features_tool.py
"""
EBI Proteins API Feature Categories tool for ToolUniverse.

Provides access to specific feature categories from the EBI Proteins API:
- DOMAINS_AND_SITES: binding sites, DNA binding regions, motifs, regions
- MOLECULE_PROCESSING: signal peptides, transit peptides, chains, propeptides
- STRUCTURAL: secondary structure assignments (helix, strand, turn)

API: https://www.ebi.ac.uk/proteins/api/
No authentication required. Free public access.
"""

import requests
from typing import Dict, Any, Optional
from .base_tool import BaseTool


PROTEINS_API_BASE_URL = "https://www.ebi.ac.uk/proteins/api"



[docs]
class EBIProteinsFeaturesTool(BaseTool):
    """
    Tool for retrieving category-specific protein features from EBI Proteins API.

    Different from EBIProteinsExtTool (mutagenesis, PTM) - this covers
    domain/site annotations, molecule processing info, and secondary structure.

    No authentication required.
    """


[docs]
    def __init__(self, tool_config: Dict[str, Any]):
        super().__init__(tool_config)
        self.timeout = tool_config.get("timeout", 30)
        fields = tool_config.get("fields", {})
        self.category = fields.get("category", "DOMAINS_AND_SITES")



[docs]
    def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
        """Execute the EBI Proteins API features call."""
        try:
            return self._get_features(arguments)
        except requests.exceptions.Timeout:
            return {"error": f"EBI Proteins API timed out after {self.timeout}s"}
        except requests.exceptions.ConnectionError:
            return {"error": "Failed to connect to EBI Proteins API"}
        except requests.exceptions.HTTPError as e:
            code = e.response.status_code if e.response is not None else "unknown"
            if code == 404:
                return {"error": f"Protein not found: {arguments.get('accession', '')}"}
            return {"error": f"EBI Proteins API HTTP error: {code}"}
        except Exception as e:
            return {"error": f"Unexpected error querying EBI Proteins API: {str(e)}"}



[docs]
    def _get_features(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
        """Get features for a specific category."""
        accession = arguments.get("accession", "")
        if not accession:
            return {
                "error": "accession parameter is required (UniProt accession, e.g., P04637)"
            }

        url = f"{PROTEINS_API_BASE_URL}/features/{accession}"
        params = {"categories": self.category}
        headers = {"Accept": "application/json"}
        response = requests.get(
            url, params=params, headers=headers, timeout=self.timeout
        )
        response.raise_for_status()
        data = response.json()

        features = []
        for f in data.get("features", []):
            feature = {
                "type": f.get("type"),
                "position_start": f.get("begin"),
                "position_end": f.get("end"),
                "description": f.get("description"),
            }

            # Include evidences (compact)
            evidences = []
            for ev in f.get("evidences", [])[:3]:
                src = ev.get("source", {})
                evidences.append(
                    {
                        "code": ev.get("code"),
                        "source": src.get("name"),
                        "id": src.get("id"),
                    }
                )
            if evidences:
                feature["evidences"] = evidences

            features.append(feature)

        # Get feature type distribution
        type_counts = {}
        for f in data.get("features", []):
            ft = f.get("type", "UNKNOWN")
            type_counts[ft] = type_counts.get(ft, 0) + 1

        # Determine source label
        category_labels = {
            "DOMAINS_AND_SITES": "Domain/Site Features",
            "MOLECULE_PROCESSING": "Molecule Processing",
            "STRUCTURAL": "Secondary Structure",
        }

        return {
            "data": {
                "accession": data.get("accession"),
                "entry_name": data.get("entryName"),
                "sequence_length": len(data.get("sequence", "")),
                "features": features[:100],
                "total_features": len(data.get("features", [])),
                "feature_type_counts": type_counts,
            },
            "metadata": {
                "source": f"EBI Proteins API - {category_labels.get(self.category, self.category)}",
                "accession": accession,
                "category": self.category,
            },
        }