Source code for tooluniverse.imgt_tool
"""
IMGT (International ImMunoGeneTics Information System) tool for ToolUniverse.
IMGT is the international reference for immunoglobulin (IG), T cell receptor (TR),
and MHC/HLA gene sequences.
Website: https://www.imgt.org/
Uses DBFetch for sequence retrieval where available.
"""
import requests
from typing import Dict, Any, Optional, List
from .base_tool import BaseTool
from .tool_registry import register_tool
# IMGT related URLs
IMGT_BASE_URL = "https://www.imgt.org"
EBI_DBFETCH_URL = "https://www.ebi.ac.uk/Tools/dbfetch/dbfetch"
[docs]
@register_tool("IMGTTool")
class IMGTTool(BaseTool):
"""
Tool for accessing IMGT immunoglobulin/TCR data.
IMGT provides:
- Immunoglobulin gene sequences
- T cell receptor sequences
- MHC/HLA sequences
- Germline gene assignments
Uses EBI DBFetch for sequence retrieval. No authentication required.
"""
[docs]
def __init__(self, tool_config: Dict[str, Any]):
super().__init__(tool_config)
self.timeout: int = tool_config.get("timeout", 30)
self.parameter = tool_config.get("parameter", {})
[docs]
def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Execute IMGT query based on operation type."""
operation = arguments.get("operation", "")
if operation == "get_sequence":
return self._get_sequence(arguments)
elif operation == "search_genes":
return self._search_genes(arguments)
elif operation == "get_gene_info":
return self._get_gene_info(arguments)
else:
return {
"status": "error",
"error": f"Unknown operation: {operation}. Supported: get_sequence, search_genes, get_gene_info",
}
[docs]
def _get_sequence(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""
Get immunoglobulin/TCR sequence by accession.
Args:
arguments: Dict containing:
- accession: IMGT/LIGM-DB accession or EMBL/GenBank accession
- format: Output format (fasta, embl). Default: fasta
"""
accession = arguments.get("accession", "")
if not accession:
return {"status": "error", "error": "Missing required parameter: accession"}
fmt = arguments.get("format", "fasta")
try:
# Use EBI DBFetch to retrieve IMGT sequences
response = requests.get(
EBI_DBFETCH_URL,
params={
"db": "imgt",
"id": accession,
"format": fmt,
"style": "raw",
},
timeout=self.timeout,
headers={"User-Agent": "ToolUniverse/IMGT"},
)
if response.status_code == 404 or "not found" in response.text.lower():
# Try EMBL database as fallback
response = requests.get(
EBI_DBFETCH_URL,
params={
"db": "embl",
"id": accession,
"format": fmt,
"style": "raw",
},
timeout=self.timeout,
headers={"User-Agent": "ToolUniverse/IMGT"},
)
if response.status_code == 404:
return {"status": "error", "error": f"Sequence not found: {accession}"}
response.raise_for_status()
return {
"status": "success",
"data": {
"accession": accession,
"format": fmt,
"sequence": response.text,
},
"metadata": {
"source": "IMGT via EBI DBFetch",
"accession": accession,
},
}
except requests.exceptions.RequestException as e:
return {"status": "error", "error": f"Request failed: {str(e)}"}
except Exception as e:
return {"status": "error", "error": f"Unexpected error: {str(e)}"}
[docs]
def _search_genes(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""
Search IMGT for immunoglobulin/TCR genes.
Args:
arguments: Dict containing:
- query: Search query (gene name, species)
- gene_type: Gene type filter (IGHV, IGKV, IGLV, TRAV, TRBV, etc.)
- species: Species filter (e.g., Homo sapiens)
"""
query = arguments.get("query", "")
gene_type = arguments.get("gene_type", "")
species = arguments.get("species", "Homo sapiens")
# Build search guidance
search_info = {
"query": query,
"gene_type": gene_type if gene_type else "all",
"species": species,
"search_url": f"{IMGT_BASE_URL}/IMGT_GENE-DB/GENElect?query=2+{gene_type}&species={species.replace(' ', '+')}",
"reference_url": f"{IMGT_BASE_URL}/IMGTrepertoire/",
"gene_types": {
"IGHV": "Immunoglobulin heavy chain variable",
"IGHD": "Immunoglobulin heavy chain diversity",
"IGHJ": "Immunoglobulin heavy chain joining",
"IGKV": "Immunoglobulin kappa chain variable",
"IGLV": "Immunoglobulin lambda chain variable",
"TRAV": "T cell receptor alpha chain variable",
"TRBV": "T cell receptor beta chain variable",
},
}
return {
"status": "success",
"data": search_info,
"metadata": {
"source": "IMGT",
"note": "Use the provided URLs for detailed gene search. IMGT web interface required for full search.",
},
}
[docs]
def _get_gene_info(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""
Get information about IMGT gene nomenclature and databases.
Args:
arguments: Dict (no required parameters)
"""
gene_info = {
"databases": {
"IMGT/LIGM-DB": "Annotated IG/TR sequences from EMBL/GenBank/DDBJ",
"IMGT/GENE-DB": "Human and mouse IG/TR gene reference",
"IMGT/3Dstructure-DB": "3D structures of IG, TR, MHC",
},
"gene_nomenclature": {
"description": "IMGT unique gene nomenclature",
"format": "[LOCUS][GROUP][SUBGROUP]*[ALLELE]",
"example": "IGHV1-2*01",
"components": {
"LOCUS": "IG (immunoglobulin) or TR (T cell receptor)",
"CHAIN": "H (heavy), K (kappa), L (lambda), A (alpha), B (beta)",
"REGION": "V (variable), D (diversity), J (joining), C (constant)",
},
},
"tools": {
"IMGT/V-QUEST": "Sequence alignment to germline V genes",
"IMGT/HighV-QUEST": "High-throughput sequence analysis",
"IMGT/DomainGapAlign": "Domain annotation",
},
"urls": {
"main": IMGT_BASE_URL,
"gene_db": f"{IMGT_BASE_URL}/IMGT_GENE-DB/",
"ligm_db": f"{IMGT_BASE_URL}/ligmdb/",
"vquest": f"{IMGT_BASE_URL}/IMGT_vquest/",
},
}
return {
"status": "success",
"data": gene_info,
"metadata": {
"source": "IMGT",
},
}