Source code for tooluniverse.biorxiv_tool
import requests
from .base_tool import BaseTool
from .http_utils import request_with_retry
from .tool_registry import register_tool
[docs]
@register_tool("BioRxivTool")
class BioRxivTool(BaseTool):
"""
Get bioRxiv or medRxiv preprint metadata by DOI.
This tool retrieves full metadata for a specific preprint using the bioRxiv API.
For searching preprints by keywords, use EuropePMC_search_articles with 'SRC:PPR' filter instead.
Arguments:
doi (str): bioRxiv or medRxiv DOI (e.g., '10.1101/2023.12.01.569554' or '2023.12.01.569554')
server (str): Server name - 'biorxiv' or 'medrxiv' (default: 'biorxiv')
"""
[docs]
def __init__(
self,
tool_config,
base_url="https://api.biorxiv.org/details",
):
super().__init__(tool_config)
self.base_url = base_url
self.session = requests.Session()
self.session.headers.update({"Accept": "application/json"})
[docs]
def run(self, arguments=None):
arguments = arguments or {}
doi = arguments.get("doi")
server = arguments.get("server", "biorxiv")
if not doi:
return {
"status": "error",
"error": "`doi` parameter is required. Provide a bioRxiv DOI like '10.1101/2023.12.01.569554' or '2023.12.01.569554'.",
"data": None,
}
# Validate server
if server not in ("biorxiv", "medrxiv"):
return {
"status": "error",
"error": f"Invalid server '{server}'. Must be 'biorxiv' or 'medrxiv'.",
"data": None,
}
# Normalize DOI - allow partial DOIs like "2023.12.01.569554"
doi = str(doi).strip()
if not doi.startswith("10.1101/"):
doi = f"10.1101/{doi}"
# API format: /details/{server}/{doi}/na/json
url = f"{self.base_url}/{server}/{doi}/na/json"
try:
resp = request_with_retry(
self.session, "GET", url, timeout=10, max_attempts=2
)
if resp.status_code == 404:
return {
"status": "error",
"error": f"Preprint not found with DOI: {doi}. Check the DOI is correct and the paper exists on {server}.",
"data": None,
}
if resp.status_code != 200:
return {
"status": "error",
"error": f"{server} API returned status {resp.status_code}",
"reason": resp.reason,
"data": None,
}
data = resp.json()
collection = data.get("collection", [])
if not collection:
return {
"status": "error",
"error": "No data returned from bioRxiv API",
"data": None,
}
# Get first (and only) result
item = collection[0]
# Parse authors string into list
authors_str = item.get("authors", "")
if isinstance(authors_str, str) and authors_str:
authors = [a.strip() for a in authors_str.split(";") if a.strip()]
else:
authors = []
# Build response with comprehensive metadata
doi_val = item.get("doi")
result = {
"doi": doi_val,
"title": item.get("title"),
"authors": authors,
"author_corresponding": item.get("author_corresponding"),
"author_corresponding_institution": item.get(
"author_corresponding_institution"
),
"abstract": item.get("abstract"),
"date": item.get("date"),
"version": item.get("version"),
"type": item.get("type"),
"license": item.get("license"),
"category": item.get("category"),
"published": item.get("published") or None,
"url": f"https://www.{server}.org/content/{doi_val}"
if doi_val
else None,
"pdf_url": f"https://www.{server}.org/content/{doi_val}.full.pdf"
if doi_val
else None,
"xml_url": item.get("jatsxml"),
"server": server,
}
return {"status": "success", "data": result}
except requests.RequestException as e:
return {
"status": "error",
"error": f"Network error retrieving preprint: {str(e)}",
"data": None,
}
except ValueError:
return {
"status": "error",
"error": f"{server} API returned invalid JSON response",
"data": None,
}
except Exception as e:
return {
"status": "error",
"error": f"Failed to retrieve preprint: {str(e)}",
"data": None,
}