tooluniverse.unpaywall_tool 源代码
import requests
from .base_tool import BaseTool
from .tool_registry import register_tool
DEFAULT_EMAIL = "tools@tooluniverse.org"
[文档]
@register_tool("UnpaywallTool")
class UnpaywallTool(BaseTool):
"""
Query Unpaywall by DOI to check open-access status, OA locations,
and retrieve full-text URLs.
"""
[文档]
def __init__(self, tool_config, base_url="https://api.unpaywall.org/v2/"):
super().__init__(tool_config)
self.base_url = base_url.rstrip("/") + "/"
[文档]
def run(self, arguments):
tool_name = self.tool_config.get("name", "")
doi = arguments.get("doi")
email = arguments.get("email")
if not doi:
return {"status": "error", "error": "`doi` parameter is required."}
if tool_name == "Unpaywall_get_full_text_url":
return self._get_full_text_url(doi, email or DEFAULT_EMAIL)
# Default: Unpaywall_check_oa_status (original behavior)
if not email:
return {
"status": "error",
"error": "`email` parameter is required for Unpaywall.",
}
return self._check_oa_status(doi, email)
[文档]
def _call_api(self, doi, email):
"""Shared API call logic."""
url = f"{self.base_url}{doi}"
params = {"email": email}
try:
response = requests.get(url, params=params, timeout=20)
except requests.RequestException as e:
return None, {
"status": "error",
"error": "Network error calling Unpaywall API",
"reason": str(e),
}
if response.status_code == 404:
return None, {
"status": "error",
"error": f"DOI not found in Unpaywall: {doi}",
}
if response.status_code != 200:
return None, {
"status": "error",
"error": f"Unpaywall API error {response.status_code}",
"reason": response.reason,
}
return response.json(), None
[文档]
def _check_oa_status(self, doi, email):
"""Original OA status check."""
data, err = self._call_api(doi, email)
if err:
return err
return {
"status": "success",
"data": {
"is_oa": data.get("is_oa"),
"oa_status": data.get("oa_status"),
"best_oa_location": data.get("best_oa_location"),
"oa_locations": data.get("oa_locations"),
"journal_is_oa": data.get("journal_is_oa"),
"journal_issn_l": data.get("journal_issn_l"),
"journal_issns": data.get("journal_issns"),
"doi": data.get("doi"),
"title": data.get("title"),
"year": data.get("year"),
"publisher": data.get("publisher"),
"url": data.get("url"),
},
"metadata": {"source": "Unpaywall", "email": email},
}
[文档]
@staticmethod
def _extract_oa_location(loc):
"""Extract relevant fields from a single OA location dict."""
if not loc or not isinstance(loc, dict):
return None
return {
"url": loc.get("url"),
"url_for_pdf": loc.get("url_for_pdf"),
"url_for_landing_page": loc.get("url_for_landing_page"),
"host_type": loc.get("host_type"),
"version": loc.get("version"),
"license": loc.get("license"),
"is_best": loc.get("is_best", False),
}
[文档]
def _get_full_text_url(self, doi, email):
"""Retrieve full-text PDF and landing page URLs for a DOI."""
data, err = self._call_api(doi, email)
if err:
return err
best = data.get("best_oa_location") or {}
all_locations = [
extracted
for loc in (data.get("oa_locations") or [])
if (extracted := self._extract_oa_location(loc))
]
return {
"status": "success",
"data": {
"doi": data.get("doi"),
"title": data.get("title"),
"is_oa": data.get("is_oa", False),
"oa_status": data.get("oa_status"),
"best_pdf_url": best.get("url_for_pdf"),
"best_landing_page_url": best.get("url_for_landing_page"),
"best_oa_url": best.get("url"),
"best_oa_host_type": best.get("host_type"),
"best_oa_version": best.get("version"),
"best_oa_license": best.get("license"),
"all_oa_locations": all_locations,
"journal_name": data.get("journal_name"),
"publisher": data.get("publisher"),
"year": data.get("year"),
},
"metadata": {
"source": "Unpaywall",
"api_version": "v2",
"total_oa_locations": len(all_locations),
},
}