Source code for tooluniverse.oma_tool
# oma_tool.py
"""
OMA (Orthologous MAtrix) Browser API tool for ToolUniverse.
OMA is a comprehensive database of orthologs among complete genomes.
It provides orthology predictions using a rigorous algorithm applied to
2,600+ genomes. OMA offers protein lookup, pairwise orthologs,
Hierarchical Orthologous Groups (HOGs), and OMA Groups.
API: https://omabrowser.org/api/
No authentication required. Free public access.
"""
import requests
from typing import Dict, Any
from .base_tool import BaseTool
from .tool_registry import register_tool
OMA_BASE_URL = "https://omabrowser.org/api"
[docs]
@register_tool("OMATool")
class OMATool(BaseTool):
"""
Tool for querying the OMA Orthology Browser.
OMA provides orthology relationships among 2,600+ complete genomes
using a highly reliable algorithm. Supports protein lookup, pairwise
orthologs, Hierarchical Orthologous Groups (HOGs), and OMA Groups.
No authentication required.
"""
[docs]
def __init__(self, tool_config: Dict[str, Any]):
super().__init__(tool_config)
self.timeout = tool_config.get("timeout", 30)
fields = tool_config.get("fields", {})
self.endpoint = fields.get("endpoint", "protein")
[docs]
def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Execute the OMA API call."""
try:
return self._query(arguments)
except requests.exceptions.Timeout:
return {"error": f"OMA API request timed out after {self.timeout} seconds"}
except requests.exceptions.ConnectionError:
return {
"error": "Failed to connect to OMA API. Check network connectivity."
}
except requests.exceptions.HTTPError as e:
return {"error": f"OMA API HTTP error: {e.response.status_code}"}
except Exception as e:
return {"error": f"Unexpected error querying OMA: {str(e)}"}
[docs]
def _query(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Route to appropriate OMA endpoint."""
if self.endpoint == "protein":
return self._get_protein(arguments)
elif self.endpoint == "orthologs":
return self._get_orthologs(arguments)
elif self.endpoint == "hog":
return self._get_hog(arguments)
elif self.endpoint == "group":
return self._get_group(arguments)
else:
return {"error": f"Unknown endpoint: {self.endpoint}"}
[docs]
def _get_protein(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Get protein information by UniProt accession or OMA ID."""
protein_id = arguments.get("protein_id", "")
if not protein_id:
return {
"error": "protein_id parameter is required (UniProt accession e.g. P04637, or OMA ID e.g. HUMAN31534)"
}
url = f"{OMA_BASE_URL}/protein/{protein_id}/"
response = requests.get(url, timeout=self.timeout)
response.raise_for_status()
data = response.json()
species = data.get("species", {})
locus = data.get("locus", {})
result = {
"entry_nr": data.get("entry_nr"),
"oma_id": data.get("omaid"),
"canonical_id": data.get("canonicalid"),
"sequence_length": data.get("sequence_length"),
"species_code": species.get("code"),
"species_name": species.get("species"),
"taxon_id": species.get("taxon_id"),
"oma_group": data.get("oma_group"),
"oma_hog_id": data.get("oma_hog_id"),
"chromosome": data.get("chromosome"),
"locus_start": locus.get("start"),
"locus_end": locus.get("end"),
"locus_strand": locus.get("strand"),
"is_main_isoform": data.get("is_main_isoform"),
"roothog_id": data.get("roothog_id"),
}
return {
"data": result,
"metadata": {
"source": "OMA Browser",
"query": protein_id,
},
}
[docs]
def _get_orthologs(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Get pairwise orthologs for a protein."""
protein_id = arguments.get("protein_id", "")
if not protein_id:
return {
"error": "protein_id parameter is required (UniProt accession e.g. P04637)"
}
rel_type = arguments.get("rel_type")
per_page = arguments.get("per_page", 20)
url = f"{OMA_BASE_URL}/protein/{protein_id}/orthologs/"
params = {"per_page": min(per_page, 100)}
if rel_type:
params["rel_type"] = rel_type
response = requests.get(url, params=params, timeout=self.timeout)
response.raise_for_status()
data = response.json()
results = []
for orth in data:
species = orth.get("species", {})
results.append(
{
"oma_id": orth.get("omaid"),
"canonical_id": orth.get("canonicalid"),
"species_name": species.get("species"),
"species_code": species.get("code"),
"taxon_id": species.get("taxon_id"),
"rel_type": orth.get("rel_type"),
"distance": orth.get("distance"),
"score": orth.get("score"),
"sequence_length": orth.get("sequence_length"),
"chromosome": orth.get("chromosome"),
}
)
return {
"data": results,
"metadata": {
"source": "OMA Browser",
"query": protein_id,
"total_orthologs": len(results),
},
}
[docs]
def _get_hog(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Get Hierarchical Orthologous Group (HOG) information."""
hog_id = arguments.get("hog_id", "")
if not hog_id:
return {"error": "hog_id parameter is required (e.g. HOG:E0739094)"}
url = f"{OMA_BASE_URL}/hog/{hog_id}/"
response = requests.get(url, timeout=self.timeout)
response.raise_for_status()
data = response.json()
results = []
for hog in data:
children = []
for child in hog.get("children_hogs", []):
children.append(
{
"hog_id": child.get("hog_id"),
"alternative_levels": child.get("alternative_levels", [])[:5],
}
)
results.append(
{
"hog_id": hog.get("hog_id"),
"level": hog.get("level"),
"roothog_id": hog.get("roothog_id"),
"completeness_score": hog.get("completeness_score"),
"description": hog.get("description"),
"parent_hogs": hog.get("parent_hogs", []),
"children_hogs": children[:10],
"alternative_levels": hog.get("alternative_levels", [])[:10],
}
)
return {
"data": results,
"metadata": {
"source": "OMA Browser",
"query": hog_id,
"total_entries": len(results),
},
}
[docs]
def _get_group(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Get OMA Group details (strict 1:1 orthologs across all genomes)."""
group_id = arguments.get("group_id", "")
if not group_id:
return {
"error": "group_id parameter is required (numeric group ID, e.g. 1388790)"
}
url = f"{OMA_BASE_URL}/group/{group_id}/"
response = requests.get(url, timeout=self.timeout)
response.raise_for_status()
data = response.json()
members = []
for m in data.get("members", [])[:30]: # Limit members
species = m.get("species", {})
members.append(
{
"oma_id": m.get("omaid"),
"canonical_id": m.get("canonicalid"),
"species_name": species.get("species"),
"species_code": species.get("code"),
"taxon_id": species.get("taxon_id"),
"sequence_length": m.get("sequence_length"),
"chromosome": m.get("chromosome"),
}
)
result = {
"group_nr": data.get("group_nr"),
"fingerprint": data.get("fingerprint"),
"description": data.get("description"),
"members": members,
}
return {
"data": result,
"metadata": {
"source": "OMA Browser",
"query": str(group_id),
"total_members": len(data.get("members", [])),
"returned_members": len(members),
},
}