Source code for tooluniverse.bgpt_tool

"""
BGPT structured scientific-evidence tool for ToolUniverse.

BGPT searches scientific papers and returns structured, full-text-derived
evidence fields for each study — methods, sample size/population, results,
limitations and biases, conflicts of interest, data/code availability,
quality scores, study blind spots, and a `how_to_falsify` statement — rather
than only titles and abstracts. This is complementary to the PubMed /
EuropePMC / OpenAlex discovery tools, which return bibliographic metadata.

API: https://bgpt.pro/api/mcp-search (POST, JSON). OpenAPI:
https://raw.githubusercontent.com/connerlambden/bgpt-mcp/main/openapi.yaml
License: MIT. The first 50 results are free (no key); set BGPT_API_KEY for
the paid tier once the free allowance is exhausted. Structured fields are
model-generated, so treat them as an appraisal aid, not curated ground truth.

Requested in mims-harvard/ToolUniverse issue #204.
"""

import os
from typing import Any, Dict

import requests

from .base_tool import BaseTool
from .tool_registry import register_tool

BGPT_SEARCH_URL = "https://bgpt.pro/api/mcp-search"



[docs]
@register_tool("BGPTPaperEvidenceTool")
class BGPTPaperEvidenceTool(BaseTool):
    """
    Search scientific papers via BGPT and return structured study-evidence
    fields for critical appraisal.

    The BGPT_API_KEY environment variable (or an `api_key` argument) is
    optional — it is only needed once the free result allowance is used up.
    """


[docs]
    def __init__(self, tool_config: Dict[str, Any]):
        super().__init__(tool_config)
        # Full-text evidence extraction is slow; allow more than the usual 30s.
        self.timeout: int = tool_config.get("timeout", 60)



[docs]
    def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
        query = arguments.get("query") or arguments.get("search_keywords")
        if not query or not str(query).strip():
            return {
                "status": "error",
                "error": "Parameter 'query' is required (a natural-language scientific search query).",
            }

        try:
            num_results = int(arguments.get("num_results", arguments.get("limit", 10)))
        except (TypeError, ValueError):
            return {
                "status": "error",
                "error": "Parameter 'num_results' must be an integer (1-100).",
            }
        num_results = max(1, min(num_results, 100))

        payload: Dict[str, Any] = {"query": str(query), "num_results": num_results}

        days_back = arguments.get("days_back")
        if days_back is not None:
            try:
                payload["days_back"] = max(1, int(days_back))
            except (TypeError, ValueError):
                return {
                    "status": "error",
                    "error": "Parameter 'days_back' must be a positive integer (number of days).",
                }

        # api_key is optional: explicit argument first, then BGPT_API_KEY env var.
        api_key = arguments.get("api_key") or os.environ.get("BGPT_API_KEY")
        if api_key:
            payload["api_key"] = api_key

        try:
            response = requests.post(
                BGPT_SEARCH_URL, json=payload, timeout=self.timeout
            )
        except requests.Timeout:
            return {
                "status": "error",
                "error": f"BGPT request timed out after {self.timeout}s. Try a narrower query or fewer num_results.",
            }
        except requests.exceptions.RequestException as e:
            return {"status": "error", "error": f"Failed to reach BGPT: {str(e)}"}

        # 402 = free allowance exhausted; the user needs a paid-tier key.
        if response.status_code == 402:
            return {
                "status": "error",
                "error": (
                    "BGPT free result allowance is exhausted. Set the BGPT_API_KEY "
                    "environment variable (or pass 'api_key') to continue. See https://bgpt.pro/mcp/"
                ),
            }
        if response.status_code != 200:
            return {
                "status": "error",
                "error": f"BGPT API returned HTTP {response.status_code}",
                "detail": response.text[:500],
            }

        try:
            body = response.json()
        except ValueError:
            return {
                "status": "error",
                "error": "BGPT returned a non-JSON response.",
                "detail": response.text[:500],
            }

        results = body.get("results", []) if isinstance(body, dict) else []
        return {
            "status": "success",
            "data": results,
            "metadata": {
                "source": "BGPT (bgpt.pro)",
                "query": str(query),
                "returned": len(results),
                "evidence_fields_are_model_generated": True,
            },
        }