Source code for tooluniverse.bgpt_tool
"""
BGPT structured scientific-evidence tool for ToolUniverse.
BGPT searches scientific papers and returns structured, full-text-derived
evidence fields for each study — methods, sample size/population, results,
limitations and biases, conflicts of interest, data/code availability,
quality scores, study blind spots, and a `how_to_falsify` statement — rather
than only titles and abstracts. This is complementary to the PubMed /
EuropePMC / OpenAlex discovery tools, which return bibliographic metadata.
API: https://bgpt.pro/api/mcp-search (POST, JSON). OpenAPI:
https://raw.githubusercontent.com/connerlambden/bgpt-mcp/main/openapi.yaml
License: MIT. The first 50 results are free (no key); set BGPT_API_KEY for
the paid tier once the free allowance is exhausted. Structured fields are
model-generated, so treat them as an appraisal aid, not curated ground truth.
Requested in mims-harvard/ToolUniverse issue #204.
"""
import os
from typing import Any, Dict
import requests
from .base_tool import BaseTool
from .tool_registry import register_tool
BGPT_SEARCH_URL = "https://bgpt.pro/api/mcp-search"
[docs]
@register_tool("BGPTPaperEvidenceTool")
class BGPTPaperEvidenceTool(BaseTool):
"""
Search scientific papers via BGPT and return structured study-evidence
fields for critical appraisal.
The BGPT_API_KEY environment variable (or an `api_key` argument) is
optional — it is only needed once the free result allowance is used up.
"""
[docs]
def __init__(self, tool_config: Dict[str, Any]):
super().__init__(tool_config)
# Full-text evidence extraction is slow; allow more than the usual 30s.
self.timeout: int = tool_config.get("timeout", 60)
[docs]
def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
query = arguments.get("query") or arguments.get("search_keywords")
if not query or not str(query).strip():
return {
"status": "error",
"error": "Parameter 'query' is required (a natural-language scientific search query).",
}
try:
num_results = int(arguments.get("num_results", arguments.get("limit", 10)))
except (TypeError, ValueError):
return {
"status": "error",
"error": "Parameter 'num_results' must be an integer (1-100).",
}
num_results = max(1, min(num_results, 100))
payload: Dict[str, Any] = {"query": str(query), "num_results": num_results}
days_back = arguments.get("days_back")
if days_back is not None:
try:
payload["days_back"] = max(1, int(days_back))
except (TypeError, ValueError):
return {
"status": "error",
"error": "Parameter 'days_back' must be a positive integer (number of days).",
}
# api_key is optional: explicit argument first, then BGPT_API_KEY env var.
api_key = arguments.get("api_key") or os.environ.get("BGPT_API_KEY")
if api_key:
payload["api_key"] = api_key
try:
response = requests.post(
BGPT_SEARCH_URL, json=payload, timeout=self.timeout
)
except requests.Timeout:
return {
"status": "error",
"error": f"BGPT request timed out after {self.timeout}s. Try a narrower query or fewer num_results.",
}
except requests.exceptions.RequestException as e:
return {"status": "error", "error": f"Failed to reach BGPT: {str(e)}"}
# 402 = free allowance exhausted; the user needs a paid-tier key.
if response.status_code == 402:
return {
"status": "error",
"error": (
"BGPT free result allowance is exhausted. Set the BGPT_API_KEY "
"environment variable (or pass 'api_key') to continue. See https://bgpt.pro/mcp/"
),
}
if response.status_code != 200:
return {
"status": "error",
"error": f"BGPT API returned HTTP {response.status_code}",
"detail": response.text[:500],
}
try:
body = response.json()
except ValueError:
return {
"status": "error",
"error": "BGPT returned a non-JSON response.",
"detail": response.text[:500],
}
results = body.get("results", []) if isinstance(body, dict) else []
return {
"status": "success",
"data": results,
"metadata": {
"source": "BGPT (bgpt.pro)",
"query": str(query),
"returned": len(results),
"evidence_fields_are_model_generated": True,
},
}