Source code for tooluniverse.deepgo_tool

"""
DeepGO Tool - Predict protein function using Gene Ontology terms.

DeepGO uses deep learning (DeepGOPlus method) to predict protein functions
from amino acid sequences. Returns GO term predictions across Biological Process,
Molecular Function, and Cellular Component ontologies with confidence scores.
"""

from typing import Dict, Any
from .base_tool import BaseTool
from .tool_registry import register_tool
import requests


[docs] @register_tool("DeepGOTool") class DeepGOTool(BaseTool): """Tool for predicting protein function using DeepGO.""" BASE_URL = "https://deepgo.cbrc.kaust.edu.sa/deepgo" API_VERSION = "1.0.26"
[docs] def __init__(self, tool_config): super().__init__(tool_config) self.parameter = tool_config.get("parameter", {}) self.required = self.parameter.get("required", []) self.operation = tool_config.get("fields", {}).get( "operation", "predict_function" )
[docs] def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Route to appropriate operation handler.""" operation = self.operation if operation == "predict_function": return self._predict_function(arguments) else: return {"status": "error", "error": f"Unknown operation: {operation}"}
[docs] def _predict_function(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Predict protein function from sequence.""" sequence = arguments.get("sequence") if not sequence: return {"status": "error", "error": "Missing required parameter: sequence"} # Clean sequence - remove whitespace and convert to uppercase sequence = "".join(sequence.split()).upper() # Validate sequence valid_aa = set("ACDEFGHIKLMNPQRSTVWY") if not all(aa in valid_aa for aa in sequence): return { "status": "error", "error": "Invalid sequence: contains non-amino acid characters", } if len(sequence) < 10: return { "status": "error", "error": "Sequence too short: minimum 10 amino acids required", } if len(sequence) > 5000: return { "status": "error", "error": "Sequence too long: maximum 5000 amino acids supported", } # Get threshold for predictions threshold = arguments.get("threshold", 0.3) if not 0.1 <= threshold <= 1.0: return {"status": "error", "error": "Threshold must be between 0.1 and 1.0"} # Prepare request protein_name = arguments.get("name", "query") fasta_data = f">{protein_name}\n{sequence}" try: response = requests.post( f"{self.BASE_URL}/api/create", json={ "version": self.API_VERSION, "data_format": "fasta", "data": fasta_data, "threshold": threshold, }, headers={"Content-Type": "application/json"}, timeout=120, # Longer timeout for prediction ) response.raise_for_status() data = response.json() # Extract predictions predictions = data.get("predictions", []) if not predictions: return { "status": "success", "data": { "sequence": sequence[:50] + "..." if len(sequence) > 50 else sequence, "predictions": [], "threshold": threshold, }, "message": "No predictions above threshold", } # Format results pred = predictions[0] functions = pred.get("functions", []) result = { "sequence_name": pred.get("protein_info", protein_name), "sequence_length": len(sequence), "threshold": threshold, "predictions": {}, } for category in functions: cat_name = category.get("name", "Unknown") cat_functions = category.get("functions", []) # Format: [GO_ID, name, score] formatted = [ {"go_id": f[0], "name": f[1], "score": round(f[2], 4)} for f in cat_functions ] result["predictions"][cat_name] = formatted return {"status": "success", "data": result, "uuid": data.get("uuid")} except requests.exceptions.Timeout: return {"status": "error", "error": "Request timed out after 120s"} except requests.exceptions.HTTPError as e: return {"status": "error", "error": f"HTTP error: {e.response.status_code}"} except requests.exceptions.RequestException as e: return {"status": "error", "error": f"Request failed: {str(e)}"} except Exception as e: return {"status": "error", "error": f"Error: {str(e)}"}