Source code for tooluniverse.glygen_tool

# glygen_tool.py
"""
GlyGen REST API tool for ToolUniverse.

GlyGen is a data integration and dissemination project for carbohydrate
and glycoconjugate related data. It integrates glycan structures, glycoproteins,
glycosylation sites, species, enzymes, and publications.

API: https://api.glygen.org
No authentication required. Free for academic/research use.
"""

import requests
from typing import Dict, Any
from .base_tool import BaseTool
from .tool_registry import register_tool

GLYGEN_BASE_URL = "https://api.glygen.org"


[docs] @register_tool("GlyGenTool") class GlyGenTool(BaseTool): """ Tool for querying the GlyGen glycoinformatics database. GlyGen provides access to glycan structures, glycoproteins, glycosylation sites, biosynthetic enzymes, and related publications from integrated sources including GlyTouCan, GlyConnect, UniProt, and PubMed. No authentication required. """
[docs] def __init__(self, tool_config: Dict[str, Any]): super().__init__(tool_config) self.timeout = tool_config.get("timeout", 60) self.endpoint_type = tool_config.get("fields", {}).get( "endpoint_type", "glycan" ) self.query_mode = tool_config.get("fields", {}).get("query_mode", "detail")
[docs] def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Execute the GlyGen API call.""" try: return self._dispatch(arguments) except requests.exceptions.Timeout: return { "error": f"GlyGen API request timed out after {self.timeout} seconds" } except requests.exceptions.ConnectionError: return { "error": "Failed to connect to GlyGen API. Check network connectivity." } except requests.exceptions.HTTPError as e: return {"error": f"GlyGen API HTTP error: {e.response.status_code}"} except Exception as e: return {"error": f"Unexpected error querying GlyGen: {str(e)}"}
[docs] def _dispatch(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Route to appropriate endpoint based on config.""" if self.endpoint_type == "glycan" and self.query_mode == "detail": return self._glycan_detail(arguments) elif self.endpoint_type == "glycan" and self.query_mode == "search": return self._glycan_search(arguments) elif self.endpoint_type == "protein" and self.query_mode == "detail": return self._protein_detail(arguments) elif self.endpoint_type == "protein" and self.query_mode == "search": return self._protein_search(arguments) elif self.endpoint_type == "site" and self.query_mode == "detail": return self._site_detail(arguments) else: return { "error": f"Unknown endpoint_type/query_mode: {self.endpoint_type}/{self.query_mode}" }
[docs] def _glycan_detail(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Get detailed information about a specific glycan by GlyTouCan accession.""" glytoucan_ac = arguments.get("glytoucan_ac", "") if not glytoucan_ac: return {"error": "glytoucan_ac parameter is required"} url = f"{GLYGEN_BASE_URL}/glycan/detail/{glytoucan_ac}/" response = requests.post( url, json={}, timeout=self.timeout, headers={"Content-Type": "application/json"}, ) response.raise_for_status() raw = response.json() # Extract key fields for structured response result = { "glytoucan_ac": raw.get("glytoucan", {}).get("glytoucan_ac"), "mass": raw.get("mass"), "number_monosaccharides": raw.get("number_monosaccharides"), "glycan_type": raw.get("glycan_type"), "iupac": raw.get("iupac"), "wurcs": raw.get("wurcs"), "classification": raw.get("classification", []), "species": [ { "name": s.get("name"), "taxid": s.get("taxid"), "common_name": s.get("common_name"), } for s in raw.get("species", [])[:20] ], "glycoprotein_count": len(raw.get("glycoprotein", [])), "glycoproteins": [ { "uniprot_ac": gp.get("uniprot_canonical_ac"), "protein_name": gp.get("protein_name"), } for gp in raw.get("glycoprotein", [])[:10] ], "publication_count": len(raw.get("publication", [])), "composition": raw.get("composition", []), "byonic": raw.get("byonic"), } return { "data": result, "metadata": { "source": "GlyGen", "query": glytoucan_ac, "endpoint": "glycan/detail", }, }
[docs] def _protein_detail(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Get glycoprotein details including glycosylation sites.""" uniprot_ac = arguments.get("uniprot_ac", "") if not uniprot_ac: return {"error": "uniprot_ac parameter is required"} url = f"{GLYGEN_BASE_URL}/protein/detail/{uniprot_ac}/" response = requests.post( url, json={}, timeout=self.timeout, headers={"Content-Type": "application/json"}, ) response.raise_for_status() raw = response.json() # Extract key fields protein_names = raw.get("protein_names", []) gene_names = raw.get("gene_names", []) glycosylation = raw.get("glycosylation", []) species = raw.get("species", []) result = { "protein_name": protein_names[0].get("name") if protein_names else None, "gene_names": [g.get("name") for g in gene_names[:5]], "species": species[0].get("name") if species else None, "mass": raw.get("mass"), "sequence_length": len(raw.get("sequence", {}).get("sequence", "")), "glycosylation_count": len(glycosylation), "glycosylation_sites": [ { "position": g.get("start_pos"), "residue": g.get("residue"), "type": g.get("type"), "glytoucan_ac": g.get("glytoucan_ac"), "site_category": g.get("site_category"), } for g in glycosylation[:30] ], "function": raw.get("function", [{}])[0].get("annotation") if raw.get("function") else None, "disease_count": len(raw.get("disease", [])), "snv_count": len(raw.get("snv", [])), "pathway_count": len(raw.get("pathway", [])), "publication_count": len(raw.get("publication", [])), } return { "data": result, "metadata": { "source": "GlyGen", "query": uniprot_ac, "endpoint": "protein/detail", }, }
[docs] def _site_detail(self, arguments: Dict[str, Any]) -> Dict[str, Any]: """Get details about a specific glycosylation site.""" site_id = arguments.get("site_id", "") if not site_id: return { "error": "site_id parameter is required (format: UniProtAC-isoform.start.end, e.g. P02724-1.52.52)" } url = f"{GLYGEN_BASE_URL}/site/detail/{site_id}/" response = requests.post( url, json={}, timeout=self.timeout, headers={"Content-Type": "application/json"}, ) response.raise_for_status() raw = response.json() result = { "site_id": raw.get("id"), "uniprot_ac": raw.get("uniprot_canonical_ac"), "start_pos": raw.get("start_pos"), "end_pos": raw.get("end_pos"), "site_seq": raw.get("site_seq"), "start_aa": raw.get("start_aa"), "upstream_seq": raw.get("up_seq"), "downstream_seq": raw.get("down_seq"), "glycosylation": raw.get("glycosylation", []), "phosphorylation": raw.get("phosphorylation", []), "snv": raw.get("snv", []), "mutagenesis": raw.get("mutagenesis", []), "species": raw.get("species"), "all_sites_count": len(raw.get("all_sites", [])), "categories": raw.get("categories", []), } return { "data": result, "metadata": { "source": "GlyGen", "query": site_id, "endpoint": "site/detail", }, }