Source code for tooluniverse.biorxiv_tool

import requests
from .base_tool import BaseTool
from .http_utils import request_with_retry
from .tool_registry import register_tool


[docs] @register_tool("BioRxivTool") class BioRxivTool(BaseTool): """ Get bioRxiv or medRxiv preprint metadata by DOI. This tool retrieves full metadata for a specific preprint using the bioRxiv API. For searching preprints by keywords, use EuropePMC_search_articles with 'SRC:PPR' filter instead. Arguments: doi (str): bioRxiv or medRxiv DOI (e.g., '10.1101/2023.12.01.569554' or '2023.12.01.569554') server (str): Server name - 'biorxiv' or 'medrxiv' (default: 'biorxiv') """
[docs] def __init__( self, tool_config, base_url="https://api.biorxiv.org/details", ): super().__init__(tool_config) self.base_url = base_url self.session = requests.Session() self.session.headers.update({"Accept": "application/json"})
[docs] def run(self, arguments=None): arguments = arguments or {} doi = arguments.get("doi") server = arguments.get("server", "biorxiv") if not doi: return { "status": "error", "error": "`doi` parameter is required. Provide a bioRxiv DOI like '10.1101/2023.12.01.569554' or '2023.12.01.569554'.", "data": None, } # Validate server if server not in ("biorxiv", "medrxiv"): return { "status": "error", "error": f"Invalid server '{server}'. Must be 'biorxiv' or 'medrxiv'.", "data": None, } # Normalize DOI - allow partial DOIs like "2023.12.01.569554" doi = str(doi).strip() if not doi.startswith("10.1101/"): doi = f"10.1101/{doi}" # API format: /details/{server}/{doi}/na/json url = f"{self.base_url}/{server}/{doi}/na/json" try: resp = request_with_retry( self.session, "GET", url, timeout=10, max_attempts=2 ) if resp.status_code == 404: return { "status": "error", "error": f"Preprint not found with DOI: {doi}. Check the DOI is correct and the paper exists on {server}.", "data": None, } if resp.status_code != 200: return { "status": "error", "error": f"{server} API returned status {resp.status_code}", "reason": resp.reason, "data": None, } data = resp.json() collection = data.get("collection", []) if not collection: return { "status": "error", "error": "No data returned from bioRxiv API", "data": None, } # Get first (and only) result item = collection[0] # Parse authors string into list authors_str = item.get("authors", "") if isinstance(authors_str, str) and authors_str: authors = [a.strip() for a in authors_str.split(";") if a.strip()] else: authors = [] # Build response with comprehensive metadata doi_val = item.get("doi") result = { "doi": doi_val, "title": item.get("title"), "authors": authors, "author_corresponding": item.get("author_corresponding"), "author_corresponding_institution": item.get( "author_corresponding_institution" ), "abstract": item.get("abstract"), "date": item.get("date"), "version": item.get("version"), "type": item.get("type"), "license": item.get("license"), "category": item.get("category"), "published": item.get("published") or None, "url": f"https://www.{server}.org/content/{doi_val}" if doi_val else None, "pdf_url": f"https://www.{server}.org/content/{doi_val}.full.pdf" if doi_val else None, "xml_url": item.get("jatsxml"), "server": server, } return {"status": "success", "data": result} except requests.RequestException as e: return { "status": "error", "error": f"Network error retrieving preprint: {str(e)}", "data": None, } except ValueError: return { "status": "error", "error": f"{server} API returned invalid JSON response", "data": None, } except Exception as e: return { "status": "error", "error": f"Failed to retrieve preprint: {str(e)}", "data": None, }