tooluniverse.unpaywall_tool 源代码

import requests
from .base_tool import BaseTool
from .tool_registry import register_tool

DEFAULT_EMAIL = "tools@tooluniverse.org"


[文档] @register_tool("UnpaywallTool") class UnpaywallTool(BaseTool): """ Query Unpaywall by DOI to check open-access status, OA locations, and retrieve full-text URLs. """
[文档] def __init__(self, tool_config, base_url="https://api.unpaywall.org/v2/"): super().__init__(tool_config) self.base_url = base_url.rstrip("/") + "/"
[文档] def run(self, arguments): tool_name = self.tool_config.get("name", "") doi = arguments.get("doi") email = arguments.get("email") if not doi: return {"status": "error", "error": "`doi` parameter is required."} if tool_name == "Unpaywall_get_full_text_url": return self._get_full_text_url(doi, email or DEFAULT_EMAIL) # Default: Unpaywall_check_oa_status (original behavior) if not email: return { "status": "error", "error": "`email` parameter is required for Unpaywall.", } return self._check_oa_status(doi, email)
[文档] def _call_api(self, doi, email): """Shared API call logic.""" url = f"{self.base_url}{doi}" params = {"email": email} try: response = requests.get(url, params=params, timeout=20) except requests.RequestException as e: return None, { "status": "error", "error": "Network error calling Unpaywall API", "reason": str(e), } if response.status_code == 404: return None, { "status": "error", "error": f"DOI not found in Unpaywall: {doi}", } if response.status_code != 200: return None, { "status": "error", "error": f"Unpaywall API error {response.status_code}", "reason": response.reason, } return response.json(), None
[文档] def _check_oa_status(self, doi, email): """Original OA status check.""" data, err = self._call_api(doi, email) if err: return err return { "status": "success", "data": { "is_oa": data.get("is_oa"), "oa_status": data.get("oa_status"), "best_oa_location": data.get("best_oa_location"), "oa_locations": data.get("oa_locations"), "journal_is_oa": data.get("journal_is_oa"), "journal_issn_l": data.get("journal_issn_l"), "journal_issns": data.get("journal_issns"), "doi": data.get("doi"), "title": data.get("title"), "year": data.get("year"), "publisher": data.get("publisher"), "url": data.get("url"), }, "metadata": {"source": "Unpaywall", "email": email}, }
[文档] @staticmethod def _extract_oa_location(loc): """Extract relevant fields from a single OA location dict.""" if not loc or not isinstance(loc, dict): return None return { "url": loc.get("url"), "url_for_pdf": loc.get("url_for_pdf"), "url_for_landing_page": loc.get("url_for_landing_page"), "host_type": loc.get("host_type"), "version": loc.get("version"), "license": loc.get("license"), "is_best": loc.get("is_best", False), }
[文档] def _get_full_text_url(self, doi, email): """Retrieve full-text PDF and landing page URLs for a DOI.""" data, err = self._call_api(doi, email) if err: return err best = data.get("best_oa_location") or {} all_locations = [ extracted for loc in (data.get("oa_locations") or []) if (extracted := self._extract_oa_location(loc)) ] return { "status": "success", "data": { "doi": data.get("doi"), "title": data.get("title"), "is_oa": data.get("is_oa", False), "oa_status": data.get("oa_status"), "best_pdf_url": best.get("url_for_pdf"), "best_landing_page_url": best.get("url_for_landing_page"), "best_oa_url": best.get("url"), "best_oa_host_type": best.get("host_type"), "best_oa_version": best.get("version"), "best_oa_license": best.get("license"), "all_oa_locations": all_locations, "journal_name": data.get("journal_name"), "publisher": data.get("publisher"), "year": data.get("year"), }, "metadata": { "source": "Unpaywall", "api_version": "v2", "total_oa_locations": len(all_locations), }, }