Source code for tooluniverse.mirna_tool
# mirna_tool.py
"""
miRNA/lncRNA tools for ToolUniverse via RNAcentral and EBI Search APIs.
Provides miRNA and lncRNA search, sequence retrieval, cross-references, and
publication data by leveraging RNAcentral (the comprehensive ncRNA aggregator)
and EBI Search (for text-based queries with faceted filtering).
miRBase and LNCipedia data is aggregated through RNAcentral, which provides
a unified REST API for accessing ncRNA data from 50+ databases.
APIs used:
- RNAcentral REST API: https://rnacentral.org/api/v1/
- EBI Search: https://www.ebi.ac.uk/ebisearch/ws/rest/rnacentral
No authentication required. Free public access.
"""
import requests
from typing import Dict, Any
from .base_tool import BaseTool
RNACENTRAL_BASE = "https://rnacentral.org/api/v1"
EBI_SEARCH_BASE = "https://www.ebi.ac.uk/ebisearch/ws/rest/rnacentral"
# Standard EBI search fields
EBI_FIELDS = "description,rna_type,length,expert_db,has_secondary_structure,has_genomic_coordinates"
# Common species name to NCBI Taxonomy ID mapping
SPECIES_TAXID = {
"homo sapiens": "9606",
"human": "9606",
"mus musculus": "10090",
"mouse": "10090",
"rattus norvegicus": "10116",
"rat": "10116",
"drosophila melanogaster": "7227",
"fruit fly": "7227",
"caenorhabditis elegans": "6239",
"danio rerio": "7955",
"zebrafish": "7955",
"arabidopsis thaliana": "3702",
"saccharomyces cerevisiae": "4932",
"gallus gallus": "9031",
"chicken": "9031",
"bos taurus": "9913",
"sus scrofa": "9823",
"xenopus tropicalis": "8364",
}
[docs]
class miRNASearchTool(BaseTool):
"""
Search for miRNAs/lncRNAs/ncRNAs via EBI Search of RNAcentral.
Can filter results by RNA type for focused results.
"""
[docs]
def __init__(self, tool_config: Dict[str, Any]):
super().__init__(tool_config)
self.timeout = tool_config.get("timeout", 30)
# Get default RNA type filter from tool config fields
fields = tool_config.get("fields", {})
self.default_rna_type = fields.get("rna_type_filter", "")
[docs]
def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
try:
query = arguments.get("query", "")
# User-supplied rna_type overrides config default
rna_type_filter = arguments.get("rna_type", "") or self.default_rna_type
species = arguments.get("species", "")
size = int(arguments.get("size", 10))
# Build EBI search query with optional filters
search_parts = [query]
if rna_type_filter:
search_parts.append(f"rna_type:{rna_type_filter}")
if species:
# Map species name to taxonomy ID for reliable filtering
taxid = SPECIES_TAXID.get(species.lower(), "")
if taxid:
search_parts.append(f"TAXONOMY:{taxid}")
else:
# Try using the species name directly (works for exact matches)
search_parts.append(f"TAXONOMY:{species.replace(' ', '_')}")
search_query = " AND ".join(search_parts)
params = {
"query": search_query,
"format": "json",
"size": min(size, 100),
"fields": EBI_FIELDS,
}
resp = requests.get(EBI_SEARCH_BASE, params=params, timeout=self.timeout)
resp.raise_for_status()
result = resp.json()
entries = []
for entry in result.get("entries", []):
fields = entry.get("fields", {})
entries.append(
{
"rnacentral_id": entry.get("id", ""),
"description": (
fields.get("description", [""])[0]
if fields.get("description")
else ""
),
"rna_type": (
fields.get("rna_type", [""])[0]
if fields.get("rna_type")
else ""
),
"length": int(fields.get("length", ["0"])[0])
if fields.get("length")
else 0,
"expert_databases": fields.get("expert_db", []),
"has_secondary_structure": (
fields.get("has_secondary_structure", ["False"])[0]
== "True"
if fields.get("has_secondary_structure")
else False
),
"has_genomic_coordinates": (
fields.get("has_genomic_coordinates", ["False"])[0]
== "True"
if fields.get("has_genomic_coordinates")
else False
),
}
)
return {
"data": {
"total_hits": result.get("hitCount", 0),
"returned": len(entries),
"entries": entries,
},
"metadata": {
"source": "EBI Search / RNAcentral",
"query": search_query,
},
}
except requests.exceptions.Timeout:
return {"error": f"EBI Search API timed out after {self.timeout}s"}
except requests.exceptions.ConnectionError:
return {"error": "Failed to connect to EBI Search API"}
except requests.exceptions.HTTPError as e:
code = e.response.status_code if e.response is not None else "unknown"
return {"error": f"EBI Search API HTTP error: {code}"}
except Exception as e:
return {"error": f"miRNA search failed: {str(e)}"}
[docs]
class miRNAGetTool(BaseTool):
"""
Get detailed miRNA/lncRNA information from RNAcentral by ID.
Returns sequence, species, RNA type, and database cross-references.
"""
[docs]
def __init__(self, tool_config: Dict[str, Any]):
super().__init__(tool_config)
self.timeout = tool_config.get("timeout", 30)
fields = tool_config.get("fields", {})
self.endpoint = fields.get("endpoint", "get_rna")
[docs]
def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
try:
endpoint = self.endpoint
if endpoint == "get_rna":
return self._get_rna(arguments)
elif endpoint == "get_publications":
return self._get_publications(arguments)
elif endpoint == "get_xrefs":
return self._get_xrefs(arguments)
else:
return {"error": f"Unknown endpoint: {endpoint}"}
except requests.exceptions.Timeout:
return {"error": f"RNAcentral API timed out after {self.timeout}s"}
except requests.exceptions.ConnectionError:
return {"error": "Failed to connect to RNAcentral API"}
except requests.exceptions.HTTPError as e:
code = e.response.status_code if e.response is not None else "unknown"
if code == 404:
return {
"error": f"RNA entry not found: {arguments.get('rnacentral_id', '')}"
}
return {"error": f"RNAcentral API HTTP error: {code}"}
except Exception as e:
return {"error": f"RNAcentral query failed: {str(e)}"}
[docs]
def _get_rna(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Get RNA entry details including sequence."""
rnacentral_id = arguments.get("rnacentral_id", "")
taxid = arguments.get("taxid")
# Build URL: /api/v1/rna/{URS}/{taxid} or /api/v1/rna/{URS}
if taxid:
url = f"{RNACENTRAL_BASE}/rna/{rnacentral_id}/{taxid}?format=json"
else:
url = f"{RNACENTRAL_BASE}/rna/{rnacentral_id}?format=json"
resp = requests.get(url, timeout=self.timeout)
resp.raise_for_status()
result = resp.json()
return {
"data": {
"rnacentral_id": result.get("rnacentral_id", ""),
"description": result.get("description", ""),
"short_description": result.get("short_description", ""),
"sequence": result.get("sequence", ""),
"length": result.get("length", 0),
"rna_type": result.get("rna_type", ""),
"species": result.get("species", ""),
"taxid": result.get("taxid"),
"genes": result.get("genes", []),
"publications_count": result.get("publications", 0),
"is_active": result.get("is_active", True),
"distinct_databases": result.get("distinct_databases", ""),
},
"metadata": {
"source": "RNAcentral",
"url": f"https://rnacentral.org/rna/{rnacentral_id}",
},
}
[docs]
def _get_publications(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Get publications associated with an RNA entry."""
rnacentral_id = arguments.get("rnacentral_id", "")
page_size = int(arguments.get("page_size", 10))
url = (
f"{RNACENTRAL_BASE}/rna/{rnacentral_id}/publications"
f"?format=json&page_size={min(page_size, 50)}"
)
resp = requests.get(url, timeout=self.timeout)
resp.raise_for_status()
result = resp.json()
publications = []
for pub in result.get("results", []):
publications.append(
{
"title": pub.get("title", ""),
"authors": pub.get("authors", []),
"publication": pub.get("publication", ""),
"pubmed_id": pub.get("pubmed_id", ""),
"doi": pub.get("doi", ""),
"expert_db": pub.get("expert_db", False),
}
)
return {
"data": {
"total_publications": result.get("count", 0),
"returned": len(publications),
"publications": publications,
},
"metadata": {
"source": "RNAcentral",
"rnacentral_id": rnacentral_id,
},
}
[docs]
def _get_xrefs(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Get cross-references for an RNA entry."""
rnacentral_id = arguments.get("rnacentral_id", "")
page_size = int(arguments.get("page_size", 10))
url = (
f"{RNACENTRAL_BASE}/rna/{rnacentral_id}/xrefs"
f"?format=json&page_size={min(page_size, 50)}"
)
resp = requests.get(url, timeout=self.timeout)
resp.raise_for_status()
result = resp.json()
xrefs = []
for xref in result.get("results", []):
acc = xref.get("accession", {})
xrefs.append(
{
"database": xref.get("database", ""),
"accession_id": acc.get("id", ""),
"external_id": acc.get("external_id", ""),
"description": acc.get("description", ""),
"species": acc.get("species", ""),
"rna_type": acc.get("rna_type", ""),
"gene": acc.get("gene", ""),
"taxid": xref.get("taxid"),
"is_active": xref.get("is_active", True),
"expert_db_url": acc.get("expert_db_url", ""),
"mirbase_mature_products": xref.get("mirbase_mature_products"),
"mirbase_precursor": xref.get("mirbase_precursor"),
}
)
return {
"data": {
"total_xrefs": result.get("count", 0),
"returned": len(xrefs),
"cross_references": xrefs,
},
"metadata": {
"source": "RNAcentral",
"rnacentral_id": rnacentral_id,
},
}