Source code for tooluniverse.retraction_tool

"""
Publication retraction-status tool for ToolUniverse.

Checks whether a published paper (by DOI) has been retracted, corrected, or
flagged with an expression of concern, using Crossref's ``update`` metadata
(populated from the Retraction Watch database, ``source: retraction-watch``).
This is a research-integrity guardrail for literature workflows: an agent
should confirm a paper is not retracted before citing it or building an
argument on its claims. Complements the discovery tools (PubMed, OpenAlex,
Europe PMC, BGPT), which return papers without retraction status.

API: https://api.crossref.org/works/{doi} (public, no key; a contact email
via CROSSREF_MAILTO is encouraged for Crossref's faster "polite pool").
"""

import os
from typing import Any, Dict, List

import requests

from .base_tool import BaseTool
from .tool_registry import register_tool

CROSSREF_WORKS_URL = "https://api.crossref.org/works"

# Crossref update "type" values, grouped by how serious they are.
_RETRACTION_TYPES = {"retraction", "withdrawal", "removal"}
_CONCERN_TYPES = {"expression_of_concern", "concern"}
_CORRECTION_TYPES = {"correction", "erratum", "corrigendum", "addendum"}


def _notice_date(update: Dict[str, Any]) -> str | None:
    parts = (update.get("updated") or {}).get("date-parts") or [[]]
    if parts and parts[0]:
        return "-".join(f"{p:02d}" if i else str(p) for i, p in enumerate(parts[0]))
    return None


[docs] @register_tool("RetractionCheckTool") class RetractionCheckTool(BaseTool): """Check a DOI's retraction / correction / expression-of-concern status."""
[docs] def __init__(self, tool_config: Dict[str, Any]): super().__init__(tool_config) self.timeout: int = tool_config.get("timeout", 30)
[docs] def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]: doi = arguments.get("doi") if not doi or not str(doi).strip(): return {"status": "error", "error": "Parameter 'doi' is required."} # Accept a full URL or a 'doi:'-prefixed value. doi = ( str(doi).strip().replace("https://doi.org/", "").replace("doi:", "").strip() ) params = {} mailto = os.environ.get("CROSSREF_MAILTO") if mailto: params["mailto"] = mailto try: resp = requests.get( f"{CROSSREF_WORKS_URL}/{doi}", params=params, headers={"User-Agent": "ToolUniverse/RetractionCheck"}, timeout=self.timeout, ) except requests.Timeout: return { "status": "error", "error": f"Crossref request timed out after {self.timeout}s.", } except requests.exceptions.RequestException as e: return {"status": "error", "error": f"Failed to reach Crossref: {str(e)}"} if resp.status_code == 404: return { "status": "error", "error": f"DOI '{doi}' not found in Crossref.", } if resp.status_code != 200: return { "status": "error", "error": f"Crossref returned HTTP {resp.status_code}", } try: message = resp.json().get("message", {}) except ValueError: return { "status": "error", "error": "Crossref returned a non-JSON response.", } # Deduplicate notices by (type, notice DOI) — Crossref sometimes lists # the same retraction twice. seen = set() notices: List[Dict[str, Any]] = [] for update in message.get("updated-by", []) or []: utype = str(update.get("type", "")).lower() key = (utype, update.get("DOI")) if key in seen: continue seen.add(key) notices.append( { "type": utype, "label": update.get("label"), "notice_doi": update.get("DOI"), "date": _notice_date(update), "source": update.get("source"), } ) types = {n["type"] for n in notices} is_retracted = bool(types & _RETRACTION_TYPES) title = message.get("title") or [None] return { "status": "success", "data": { "doi": doi, "title": title[0] if isinstance(title, list) else title, "is_retracted": is_retracted, "has_expression_of_concern": bool(types & _CONCERN_TYPES), "has_correction": bool(types & _CORRECTION_TYPES), "notices": notices, }, "metadata": { "source": "Crossref update metadata (Retraction Watch)", "notice_count": len(notices), }, }