Source code for tooluniverse.norine_tool

"""Norine non-ribosomal peptide (NRP) lookup tool (live REST, keyless).

Norine (https://bioinfo.cristal.univ-lille.fr/norine/, Bonsai Bioinformatics,
Universite de Lille) is the reference knowledgebase for non-ribosomal peptides
(NRPs): bacterial/fungal secondary-metabolite peptides assembled by NRP
synthetases rather than the ribosome. These molecules (e.g. tyrocidine,
microcystin, surfactin) frequently contain non-proteinogenic monomers
(D-amino acids, Orn, ornithine, fatty-acid tails) and are cyclic or branched,
so they are NOT covered by ribosomal / therapeutic / MHC peptide resources.

``NorineGetPeptideTool`` (Norine_get_peptide) looks a record up either by
peptide name (GET /norine/rest/name/json/{name}) or by Norine ID
(GET /norine/rest/id/json/{id}, zero-padded to 5 digits). The two routes wrap
their results differently (name route: top-level ``peptides`` list; id route:
``norine.peptide`` list); this tool normalizes both into a single ``peptides``
list and surfaces the count plus first-record general/structure fields.

The public REST API is keyless. The explicit ``/json/`` path segment is
required to get ``application/json`` (the default routes return HTML).
"""

from typing import Any, Dict, List, Optional

import requests

from .base_tool import BaseTool
from .tool_registry import register_tool

_BASE_URL = "https://bioinfo.cristal.univ-lille.fr/norine/rest"
_TIMEOUT = 30
_HEADERS = {"Accept": "application/json"}


def _err(message: str, **extra: Any) -> Dict[str, Any]:
    out: Dict[str, Any] = {"status": "error", "error": message}
    out.update(extra)
    return out


def _normalize_id(raw: Any) -> Optional[str]:
    """Coerce a Norine id to a zero-padded 5-digit string.

    Accepts 123, '123', '00123', 'NOR00123' -> '00123'. Returns None if no
    digits are present.
    """
    digits = "".join(ch for ch in str(raw) if ch.isdigit())
    if not digits:
        return None
    # Strip leading zeros then re-pad so 'NOR00123' and 123 both -> '00123'.
    digits = digits.lstrip("0") or "0"
    return digits.zfill(5)


def _extract_peptides(payload: Any) -> Optional[List[Any]]:
    """Normalize name-route ('peptides') vs id-route ('norine.peptide')
    wrapping into one flat list. Returns None if the shape is unrecognized.
    """
    if not isinstance(payload, dict):
        return None
    if isinstance(payload.get("peptides"), list):
        return payload["peptides"]
    norine = payload.get("norine")
    if isinstance(norine, dict) and isinstance(norine.get("peptide"), list):
        return norine["peptide"]
    return None


def _first_record_summary(peptides: List[Any]) -> Dict[str, Any]:
    """Surface general/structure fields of the first record for metadata."""
    if not peptides or not isinstance(peptides[0], dict):
        return {}
    first = peptides[0]
    general = first.get("general")
    structure = first.get("structure")
    if not isinstance(general, dict):
        general = {}
    if not isinstance(structure, dict):
        structure = {}
    return {
        "id": general.get("id"),
        "name": general.get("name"),
        "family": general.get("family"),
        "category": general.get("category"),
        "formula": general.get("formula"),
        "mw": general.get("mw"),
        "activity": general.get("activity"),
        "structure_type": structure.get("type"),
        "structure_size": structure.get("size"),
        "composition": structure.get("composition"),
    }


[docs] @register_tool( "NorineGetPeptideTool", config={ "name": "Norine_get_peptide", "type": "NorineGetPeptideTool", "description": ( "Programmatic keyless lookup of non-ribosomal peptide (NRP) records " "from Norine (Nonribosomal Peptides Database, Universite de Lille) " "by peptide name or Norine ID. Returns the curated structure " "(monomer composition, cyclic/linear type, monomer graph), " "molecular formula/weight, SMILES, biological activity, source " "organism (taxId), and literature references (PMIDs). NRPs are " "bacterial/fungal peptides made by NRP synthetases (not the " "ribosome) and often contain non-proteinogenic monomers and " "cyclic/branched backbones (e.g. tyrocidine, microcystin, " "surfactin), so they are NOT covered by ribosomal/therapeutic/MHC " "peptide resources. Provide 'name' (e.g. 'tyrocidine' -> 4 records) " "OR 'norine_id' (e.g. '00123', zero-padded to 5 digits). The name " "route may return several peptides of a family; the id route " "returns one. Keyless Norine REST API." ), "parameter": { "type": "object", "properties": { "name": { "type": ["string", "null"], "description": ( "Peptide (or family) name to look up. Example: " "'tyrocidine' returns 4 records (tyrocidine A-D). " "Case-insensitive partial-family matching is handled by " "Norine. Provide either 'name' or 'norine_id', not both." ), }, "norine_id": { "type": ["string", "integer", "null"], "description": ( "Norine peptide ID. Accepts a bare number, a " "zero-padded 5-digit string, or a 'NOR00123'-style ID " "(digits are extracted and zero-padded to 5 digits). " "Example: '00123' (microcystin family, category PK-NRP, " "organism Anabaena, taxId 1163). Provide either 'name' " "or 'norine_id', not both." ), }, }, "required": [], }, "return_schema": { "oneOf": [ { "type": "object", "description": "Successful Norine peptide lookup.", "properties": { "status": {"type": "string", "enum": ["success"]}, "data": { "type": "object", "properties": { "peptides": { "type": "array", "items": { "type": "object", "properties": { "cite": {"type": "array"}, "general": { "type": "object", "properties": { "id": {"type": "string"}, "name": {"type": "string"}, "family": {"type": "string"}, "syno": {"type": "array"}, "category": {"type": "string"}, "formula": {"type": "string"}, "mw": { "type": ["string", "number"] }, "comment": {"type": "string"}, "status": {"type": "string"}, "activity": {"type": "array"}, "source": {"type": "string"}, "doi": {"type": "string"}, }, }, "structure": { "type": "object", "properties": { "type": {"type": "string"}, "size": { "type": ["integer", "string"] }, "composition": {"type": "string"}, "graph": {"type": "string"}, "smiles": {"type": "string"}, }, }, "organism": {"type": "array"}, "reference": {"type": "array"}, }, }, "description": ( "Normalized list of Norine NRP records " "(name route 'peptides' and id route " "'norine.peptide' merged here)." ), } }, "required": ["peptides"], }, "metadata": { "type": "object", "properties": { "source": {"type": "string"}, "url": {"type": "string"}, "lookup_mode": {"type": "string"}, "query": {"type": "string"}, "count": {"type": "integer"}, "first_record": {"type": "object"}, }, }, }, "required": ["status", "data", "metadata"], }, { "type": "object", "description": "Error result.", "properties": { "status": {"type": "string", "enum": ["error"]}, "error": {"type": "string"}, "url": {"type": "string"}, "response_snippet": {"type": "string"}, }, "required": ["status", "error"], }, ] }, "test_examples": [ {"name": "tyrocidine"}, {"norine_id": "00123"}, ], "label": [ "Norine", "Non-ribosomal Peptide", "NRP", "Cyclic Peptide", "Peptide", ], "metadata": { "tags": [ "Norine", "non-ribosomal peptide", "NRP", "cyclic peptide", "monomer", "SMILES", "peptide", ], "estimated_execution_time": "1-10 seconds", }, }, ) class NorineGetPeptideTool(BaseTool): """Look up a Norine NRP record by peptide name or Norine ID."""
[docs] def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]: arguments = arguments or {} name = arguments.get("name") if isinstance(name, str): name = name.strip() or None raw_id = arguments.get("norine_id") if isinstance(raw_id, str): raw_id = raw_id.strip() or None has_name = name is not None has_id = raw_id is not None if has_name and has_id: return _err("Provide either 'name' or 'norine_id', not both.") if not has_name and not has_id: return _err("One of 'name' or 'norine_id' is required.") if has_id: norine_id = _normalize_id(raw_id) if norine_id is None: return _err(f"Invalid norine_id: {raw_id!r} (no numeric ID found).") lookup_mode = "id" query = norine_id url = f"{_BASE_URL}/id/json/{norine_id}" else: lookup_mode = "name" query = name url = f"{_BASE_URL}/name/json/{requests.utils.quote(name, safe='')}" try: resp = requests.get(url, headers=_HEADERS, timeout=_TIMEOUT) except requests.exceptions.RequestException as exc: return _err(f"Request to Norine failed: {exc}", url=url) if resp.status_code == 404: return _err(f"No Norine record found ({lookup_mode}={query}).", url=url) if resp.status_code != 200: return _err( f"Norine returned HTTP {resp.status_code}", url=url, response_snippet=(resp.text or "")[:200], ) try: payload = resp.json() except ValueError: return _err( "Norine returned a non-JSON response (ensure the /json/ route " "is used; default routes return HTML).", url=url, response_snippet=(resp.text or "")[:200], ) peptides = _extract_peptides(payload) if peptides is None: return _err( "Unexpected Norine response shape (no 'peptides' or " "'norine.peptide' list found).", url=url, response_snippet=(resp.text or "")[:200], ) # Both routes return HTTP 200 with an empty list when nothing matches. if not peptides: return _err(f"No Norine peptide found ({lookup_mode}={query}).", url=url) return { "status": "success", "data": {"peptides": peptides}, "metadata": { "source": "Norine (Nonribosomal Peptides Database, " "Universite de Lille)", "url": url, "lookup_mode": lookup_mode, "query": str(query), "count": len(peptides), "first_record": _first_record_summary(peptides), }, }