Source code for tooluniverse.lipidmaps_gene_tool
# lipidmaps_gene_tool.py
"""
LIPID MAPS Proteome Database (LMPD) gene/protein REST tool for ToolUniverse.
The existing ``LipidMapsTool`` only exposes the LIPID MAPS *compound*
(structure) context. LIPID MAPS also serves a curated proteome database
(LMPD) that links lipid-metabolism **genes** and **proteins/enzymes** to the
resource. This tool wraps those gene and protein contexts, which were
previously unreachable as ToolUniverse tools.
It lets you resolve a lipid-related enzyme/gene from a gene symbol, NCBI Gene
ID, UniProt accession, RefSeq protein id, or the LIPID MAPS protein id
(``lmp_id``) and retrieve its annotation (gene name, synonyms, chromosome,
map location, NCBI summary, species, and protein cross-references / sequence).
REST shape (no key):
GET /rest/{context}/{input_item}/{input_value}/{output_item}/json
e.g. https://www.lipidmaps.org/rest/protein/uniprot_id/P49327/all/json
API docs: https://lipidmaps.org/resources/rest
"""
from typing import Any, Dict
import requests
from .base_tool import BaseTool
from .tool_registry import register_tool
LIPIDMAPS_BASE_URL = "https://www.lipidmaps.org/rest"
# LIPID MAPS sits behind Cloudflare and 403s the default python-requests UA
# with a "Just a moment..." challenge page. A normal browser UA passes the
# JS-less challenge for plain REST endpoints.
_REQUEST_HEADERS = {
"User-Agent": (
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
"AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/124.0.0.0 Safari/537.36 ToolUniverse/LipidMapsGene"
),
"Accept": "application/json,text/plain,*/*",
}
# Input items the LMPD gene route accepts.
_GENE_INPUT_ITEMS = {"gene_symbol", "gene_id", "gene_name", "lmp_id"}
# Input items the LMPD protein route accepts.
_PROTEIN_INPUT_ITEMS = {
"uniprot_id",
"gene_symbol",
"gene_id",
"gene_name",
"lmp_id",
"refseq_id",
"mrna_id",
"protein_entry",
}
[docs]
@register_tool("LipidMapsGeneTool")
class LipidMapsGeneTool(BaseTool):
"""Query the LIPID MAPS Proteome Database (LMPD) gene/protein contexts.
Configured per tool via ``fields.context`` ("gene" or "protein") and
``fields.input_item`` (the default lookup key). The lookup key may be
overridden per call with the ``input_item`` argument.
"""
[docs]
def __init__(self, tool_config: Dict[str, Any]):
super().__init__(tool_config)
self.timeout = tool_config.get("timeout", 30)
fields = tool_config.get("fields", {})
self.context = fields.get("context", "protein")
self.input_item = fields.get("input_item", "uniprot_id")
[docs]
def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Execute an LMPD gene/protein lookup. Never raises."""
try:
input_value = arguments.get("input_value")
if input_value is None or not str(input_value).strip():
return {
"status": "error",
"error": "Parameter 'input_value' is required (e.g. a gene "
"symbol 'FASN', UniProt id 'P49327', or NCBI gene id '2194').",
}
input_value = str(input_value).strip()
output_item = str(arguments.get("output_item") or "all").strip()
# Allow the lookup key to be overridden per call, else use the
# config default for this tool.
input_item = str(arguments.get("input_item") or self.input_item).strip()
allowed = (
_GENE_INPUT_ITEMS if self.context == "gene" else _PROTEIN_INPUT_ITEMS
)
if input_item not in allowed:
return {
"status": "error",
"error": f"Unsupported input_item '{input_item}' for the "
f"{self.context} context. Supported: " + ", ".join(sorted(allowed)),
}
sub_path = f"{self.context}/{input_item}/{input_value}/{output_item}/json"
return self._make_request(sub_path, input_item, input_value)
except requests.exceptions.Timeout:
return {
"status": "error",
"error": f"LIPID MAPS request timed out after {self.timeout}s.",
}
except requests.exceptions.ConnectionError:
return {
"status": "error",
"error": "Failed to connect to LIPID MAPS. Check network connectivity.",
}
except requests.exceptions.RequestException as e:
return {
"status": "error",
"error": f"LIPID MAPS request failed: {str(e)}",
}
except Exception as e: # noqa: BLE001 - run() must never raise
return {
"status": "error",
"error": f"Unexpected error querying LIPID MAPS LMPD: {str(e)}",
}
[docs]
def _make_request(
self, sub_path: str, input_item: str, input_value: str
) -> Dict[str, Any]:
"""Fetch and normalize an LMPD response.
LIPID MAPS returns:
- a flat JSON object for a single match,
- a {"Row1": {...}, "Row2": {...}} keyed dict for multiple matches,
- an empty list ``[]`` (or empty body / "null") for no match.
Always returns ``data`` as a list of records for a consistent shape.
"""
url = f"{LIPIDMAPS_BASE_URL}/{sub_path}"
response = requests.get(url, timeout=self.timeout, headers=_REQUEST_HEADERS)
response.raise_for_status()
raw_text = response.text.strip()
if not raw_text or raw_text.lower() == "null" or raw_text in ('""', "{}", "[]"):
return {
"status": "success",
"data": [],
"metadata": {
"source": "LIPID MAPS Proteome Database (LMPD)",
"context": self.context,
"input_item": input_item,
"input_value": input_value,
"total_results": 0,
},
}
try:
payload = response.json()
except ValueError:
return {
"status": "error",
"error": "LIPID MAPS returned a non-JSON response "
f"(first 200 chars: {raw_text[:200]}).",
}
records = self._to_record_list(payload)
return {
"status": "success",
"data": records,
"metadata": {
"source": "LIPID MAPS Proteome Database (LMPD)",
"context": self.context,
"input_item": input_item,
"input_value": input_value,
"total_results": len(records),
},
}
[docs]
@staticmethod
def _to_record_list(payload: Any) -> list:
"""Flatten a LIPID MAPS payload into a list of record dicts."""
if isinstance(payload, list):
return [r for r in payload if isinstance(r, dict)]
if isinstance(payload, dict):
row_keys = sorted(k for k in payload if k.startswith("Row"))
if row_keys:
return [payload[k] for k in row_keys if isinstance(payload[k], dict)]
# Single flat record.
return [payload]
return []