tooluniverse.fda_gsrs_tool 源代码

"""
FDA GSRS Tool

Substance registration and identification tools using the FDA Global Substance
Registration System (GSRS / Substance Registration System) public API:

  - search_substances:  Search for substances by name, UNII, or InChIKey
  - get_substance:      Get full substance record by UNII code or UUID
  - get_structure:      Get structure (SMILES, molfile, formula) for a substance

API base: https://gsrs.ncats.nih.gov/api/v1
No authentication required. Free public FDA/NLM API.

UNII = Unique Ingredient Identifier. Official FDA identifier for drug ingredients.
Cross-references include DrugBank, WHO-ATC, CAS, CFR, EC/EINECS, and more.
"""

import requests
from typing import Dict, Any
from .base_tool import BaseTool
from .tool_registry import register_tool

GSRS_BASE = "https://gsrs.ncats.nih.gov/api/v1"


[文档] @register_tool("FDAGSRSTool") class FDAGSRSTool(BaseTool): """ FDA GSRS substance lookup and search tools. Operations: - search_substances: Search substances by name, UNII, InChIKey, or formula - get_substance: Retrieve full substance record by UNII or UUID - get_structure: Get structure data (SMILES, formula, InChI) by UNII """
[文档] def __init__(self, tool_config: Dict[str, Any]): super().__init__(tool_config) self.timeout = tool_config.get("timeout", 30) self.operation = tool_config.get("fields", {}).get( "operation", "search_substances" )
[文档] def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]: op = self.operation if op == "search_substances": return self._search_substances(arguments) if op == "get_substance": return self._get_substance(arguments) if op == "get_structure": return self._get_structure(arguments) return {"status": "error", "error": f"Unknown operation: {op}"}
# ------------------------------------------------------------------ # helpers # ------------------------------------------------------------------
[文档] def _clean_substance(self, r: Dict[str, Any]) -> Dict[str, Any]: """Extract key fields from a raw substance record.""" codes = r.get("codes", []) xrefs = {} for c in codes: sys_name = c.get("codeSystem", "") code_val = c.get("code", "") if sys_name and code_val: xrefs.setdefault(sys_name, []).append(code_val) names = r.get("names", []) synonyms = [n.get("name", "") for n in names if n.get("name")] return { "uuid": r.get("uuid", ""), "unii": r.get("approvalID") or r.get("unii", ""), "name": r.get("_name", ""), "substanceClass": r.get("substanceClass", ""), "status": r.get("status", ""), "formula": r.get("structure", {}).get("formula", "") if r.get("structure") else "", "smiles": r.get("structure", {}).get("smiles", "") if r.get("structure") else "", "synonyms": synonyms[:10], "xrefs": xrefs, }
[文档] def _api_get(self, url: str, params: Dict[str, Any] = None) -> Dict[str, Any]: """Shared HTTP GET with consistent error handling.""" try: resp = requests.get(url, params=params or {}, timeout=self.timeout) resp.raise_for_status() return {"ok": True, "data": resp.json()} except requests.exceptions.Timeout: return {"ok": False, "error": "FDA GSRS API timeout", "retryable": True} except requests.exceptions.HTTPError as e: sc = e.response.status_code return { "ok": False, "error": f"FDA GSRS HTTP {sc}", "retryable": sc in (408, 429, 500, 502, 503, 504), } except ValueError: ct = resp.headers.get("content-type", "") return { "ok": False, "error": "FDA GSRS returned non-JSON response", "content_type": ct, "response_snippet": resp.text[:200], "retryable": "text/html" in ct or resp.text.lstrip().startswith("<"), "suggestion": "FDA GSRS may be under maintenance. Retry in a few minutes.", } except Exception as e: return {"ok": False, "error": str(e), "retryable": False}
# ------------------------------------------------------------------ # operation: search_substances # ------------------------------------------------------------------
[文档] def _search_substances(self, arguments: Dict[str, Any]) -> Dict[str, Any]: query = ( arguments.get("query") or arguments.get("name") or arguments.get("drug_name") ) substance_class = arguments.get("substance_class", "") limit = min(int(arguments.get("limit", 10)), 50) if not query: return { "status": "error", "error": "Provide 'query' (name, UNII, InChIKey, or formula).", } params: Dict[str, Any] = {"q": query.strip(), "top": limit} if substance_class: params["fdim"] = f"substanceClass:{substance_class}" result = self._api_get(f"{GSRS_BASE}/substances/search", params) if not result["ok"]: result.pop("ok", None) return {"status": "error", **result} content = result["data"].get("content", []) total = result["data"].get("total", len(content)) substances = [self._clean_substance(r) for r in content] return { "status": "success", "data": substances, "metadata": { "query": query, "total": total, "returned": len(substances), "substance_class_filter": substance_class or None, }, }
# ------------------------------------------------------------------ # operation: get_substance # ------------------------------------------------------------------
[文档] def _get_substance(self, arguments: Dict[str, Any]) -> Dict[str, Any]: unii = arguments.get("unii") or arguments.get("id") if not unii: return { "status": "error", "error": "Provide 'unii' (e.g., 'R16CO5Y76E' for aspirin).", } result = self._api_get(f"{GSRS_BASE}/substances/{unii.strip().upper()}") if not result["ok"]: result.pop("ok", None) return {"status": "error", **result} r = result["data"] if not isinstance(r, dict) or not r.get("uuid"): return { "status": "error", "error": f"No substance found for UNII: {unii}", "suggestion": "Use FDAGSRS_search_substances to find the correct UNII code.", } # Full record - include all codes and names codes = r.get("codes", []) all_codes = [ { "codeSystem": c.get("codeSystem", ""), "code": c.get("code", ""), "type": c.get("type", ""), } for c in codes if c.get("codeSystem") and c.get("code") ] names = r.get("names", []) all_names = [ { "name": n.get("name", ""), "type": n.get("type", ""), "preferred": n.get("preferred", False), } for n in names if n.get("name") ] structure = r.get("structure", {}) or {} return { "status": "success", "data": { "uuid": r.get("uuid", ""), "unii": r.get("approvalID") or r.get("unii", ""), "name": r.get("_name", ""), "substanceClass": r.get("substanceClass", ""), "status": r.get("status", ""), "structure": { "smiles": structure.get("smiles", ""), "formula": structure.get("formula", ""), "molfile": structure.get("molfile", ""), "inchiKey": structure.get("inchiKey", ""), "charge": structure.get("charge", ""), "mwt": structure.get("mwt", ""), }, "names": all_names[:20], "codes": all_codes, }, "metadata": {"unii": unii}, }
# ------------------------------------------------------------------ # operation: get_structure # ------------------------------------------------------------------
[文档] def _get_structure(self, arguments: Dict[str, Any]) -> Dict[str, Any]: unii = arguments.get("unii") or arguments.get("id") if not unii: return { "status": "error", "error": "Provide 'unii' (e.g., 'R16CO5Y76E' for aspirin).", } result = self._api_get( f"{GSRS_BASE}/substances/{unii.strip().upper()}/structure" ) if not result["ok"]: result.pop("ok", None) return {"status": "error", **result} s = result["data"] if not isinstance(s, dict) or not s.get("id"): return { "status": "error", "error": f"No structure found for UNII: {unii}. This may be a non-chemical substance (protein, mixture, etc.).", } return { "status": "success", "data": { "id": s.get("id", ""), "smiles": s.get("smiles", ""), "formula": s.get("formula", ""), "molfile": s.get("molfile", ""), "inchiKey": s.get("inchiKey", ""), "mwt": s.get("mwt", ""), "charge": s.get("charge", ""), "stereoChemistry": s.get("stereoChemistry", ""), "opticalActivity": s.get("opticalActivity", ""), "atropisomerism": s.get("atropisomerism", ""), }, "metadata": {"unii": unii}, }