tooluniverse.hocomoco_tool 源代码
"""
HOCOMOCO v14 Tool - Transcription Factor Binding Motifs
Provides access to the HOCOMOCO (HOmo sapiens COmprehensive MOdel COllection)
database for transcription factor binding motif data. HOCOMOCO contains
high-quality TF binding models derived from ChIP-Seq data for human and mouse.
API base: https://hocomoco14.autosome.org
No authentication required.
Reference: Kulakovskiy et al., Nucl. Acids Res. 2018
"""
import requests
from typing import Dict, Any
from .base_tool import BaseTool
from .tool_registry import register_tool
HOCOMOCO_BASE = "https://hocomoco14.autosome.org"
[文档]
@register_tool("HocomocoTool")
class HocomocoTool(BaseTool):
"""
Tool for querying HOCOMOCO v14 transcription factor binding motif database.
Supported operations:
- search_motifs: Search for TF motifs by gene/protein name
- get_motif: Get detailed motif information including PWM and quality
"""
[文档]
def __init__(self, tool_config: Dict[str, Any]):
super().__init__(tool_config)
self.timeout = 30
self.endpoint_type = tool_config.get("fields", {}).get(
"endpoint_type", "search_motifs"
)
[文档]
def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Execute the HOCOMOCO API call."""
try:
if self.endpoint_type == "search_motifs":
return self._search_motifs(arguments)
elif self.endpoint_type == "get_motif":
return self._get_motif(arguments)
else:
return {
"status": "error",
"error": f"Unknown endpoint type: {self.endpoint_type}",
}
except requests.exceptions.Timeout:
return {"status": "error", "error": "HOCOMOCO API request timed out"}
except requests.exceptions.ConnectionError:
return {
"status": "error",
"error": "Failed to connect to HOCOMOCO API",
}
except Exception as e:
return {"status": "error", "error": f"HOCOMOCO API error: {str(e)}"}
[文档]
def _search_motifs(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Search for TF motifs by gene/protein name."""
query = arguments.get("query") or arguments.get("gene_name", "")
if not query:
return {"status": "error", "error": "Missing required parameter: query"}
url = f"{HOCOMOCO_BASE}/search.json"
resp = requests.get(url, params={"query": query}, timeout=self.timeout)
resp.raise_for_status()
motif_ids = resp.json()
if not motif_ids:
return {
"status": "success",
"data": [],
"metadata": {"query": query, "total": 0},
}
# Fetch basic info for each motif
results = []
for motif_id in motif_ids[:10]: # Limit to 10 results
detail = self._fetch_motif_summary(motif_id)
if detail:
results.append(detail)
return {
"status": "success",
"data": results,
"metadata": {
"query": query,
"total": len(motif_ids),
"returned": len(results),
},
}
[文档]
def _fetch_motif_summary(self, motif_id: str) -> Dict[str, Any] | None:
"""Fetch summary info for a single motif."""
try:
url = f"{HOCOMOCO_BASE}/motif/{motif_id}.json"
resp = requests.get(url, timeout=self.timeout)
if resp.status_code != 200:
return None
data = resp.json()
return {
"motif_id": data.get("full_name"),
"gene_name_human": data.get("gene_name_human"),
"gene_name_mouse": data.get("gene_name_mouse"),
"quality": data.get("quality"),
"consensus": data.get("consensus"),
"model_length": data.get("model_length"),
"uniprot_ac_human": data.get("uniprot_ac_human"),
"uniprot_ac_mouse": data.get("uniprot_ac_mouse"),
"tfclass": data.get("tfclass"),
}
except Exception:
return None
[文档]
def _get_motif(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Get detailed motif information including PWM."""
motif_id = arguments.get("motif_id", "")
if not motif_id:
return {"status": "error", "error": "Missing required parameter: motif_id"}
# Fetch motif detail
url = f"{HOCOMOCO_BASE}/motif/{motif_id}.json"
resp = requests.get(url, timeout=self.timeout)
if resp.status_code == 404:
return {"status": "error", "error": f"Motif not found: {motif_id}"}
resp.raise_for_status()
data = resp.json()
include_pwm = arguments.get("include_pwm", False)
pwm = None
if include_pwm:
pwm_url = f"{HOCOMOCO_BASE}/motif/{motif_id}/pwm.json"
pwm_resp = requests.get(pwm_url, timeout=self.timeout)
if pwm_resp.status_code == 200:
pwm = pwm_resp.json()
result = {
"motif_id": data.get("full_name"),
"gene_name_human": data.get("gene_name_human"),
"gene_name_mouse": data.get("gene_name_mouse"),
"gene_synonyms_human": data.get("gene_synonyms_human"),
"gene_synonyms_mouse": data.get("gene_synonyms_mouse"),
"quality": data.get("quality"),
"consensus": data.get("consensus"),
"model_length": data.get("model_length"),
"data_sources": data.get("data_sources"),
"motif_subtype": data.get("motif_subtype"),
"uniprot_id_human": data.get("uniprot_id_human"),
"uniprot_id_mouse": data.get("uniprot_id_mouse"),
"uniprot_ac_human": data.get("uniprot_ac_human"),
"uniprot_ac_mouse": data.get("uniprot_ac_mouse"),
"hgnc_ids": data.get("hgnc_ids"),
"entrezgene_ids_human": data.get("entrezgene_ids_human"),
"entrezgene_ids_mouse": data.get("entrezgene_ids_mouse"),
"tfclass": data.get("tfclass"),
"motif_cluster": data.get("motif_cluster"),
"quality_metrics": data.get("quality_metrics"),
"previous_names": data.get("previous_names"),
"retracted": data.get("retracted"),
}
if pwm is not None:
result["pwm"] = pwm
return {"status": "success", "data": result}