tooluniverse.openaire_dataset_tool 源代码
"""
OpenAIRE dataset search tool for ToolUniverse.
Searches the OpenAIRE dataset index for European open science datasets
with optional funder and country filters.
API Documentation: https://api.openaire.eu/
"""
import requests
from .base_tool import BaseTool
from .tool_registry import register_tool
OPENAIRE_DATASETS_URL = "https://api.openaire.eu/search/datasets"
[文档]
@register_tool("OpenAIREDatasetTool")
class OpenAIREDatasetTool(BaseTool):
"""Search OpenAIRE for European research datasets."""
[文档]
def run(self, arguments=None):
arguments = arguments or {}
keywords = (arguments.get("keywords") or "").strip()
funder = arguments.get("funder")
country = arguments.get("country")
size = max(1, min(int(arguments.get("size", 10)), 100))
if not keywords:
return {
"status": "error",
"error": {
"message": "Missing required parameter: keywords",
"details": "Provide search keywords for dataset discovery.",
},
}
params = {"keywords": keywords, "format": "json", "size": size}
if funder:
params["funder"] = funder
if country:
params["country"] = country
try:
resp = requests.get(OPENAIRE_DATASETS_URL, params=params, timeout=60)
resp.raise_for_status()
body = resp.json()
except requests.RequestException as exc:
return {
"status": "error",
"error": {
"message": "OpenAIRE API request failed",
"details": str(exc),
},
}
header = body.get("response", {}).get("header", {})
total = int(header.get("total", {}).get("$", 0))
items = body.get("response", {}).get("results", {}).get("result", []) or []
datasets = []
for item in items:
entity = (
item.get("metadata", {}).get("oaf:entity", {}).get("oaf:result", {})
)
if not entity:
# Fallback: some responses nest directly under metadata
entity = item.get("metadata", {}).get("oaf:result", {})
if not entity:
continue
title = self._extract_text(entity.get("title"))
description = self._extract_text(entity.get("description"))
if description and len(description) > 500:
description = description[:497] + "..."
creators = self._extract_text_list(entity.get("creator"))
date = self._extract_text(entity.get("dateofacceptance"))
publisher = self._extract_text(entity.get("publisher"))
access = entity.get("bestaccessright", {})
access_rights = (
access.get("@classname") if isinstance(access, dict) else None
)
doi = self._extract_doi(entity.get("pid"))
subjects = self._extract_text_list(entity.get("subject"))
datasets.append(
{
"title": title,
"description": description,
"creators": creators,
"date": date,
"publisher": publisher,
"access_rights": access_rights,
"doi": doi,
"subjects": subjects,
}
)
return {
"status": "success",
"data": {
"keywords": keywords,
"funder": funder,
"country": country,
"total_count": total,
"returned": len(datasets),
"datasets": datasets,
},
"metadata": {
"source": "OpenAIRE",
"api": OPENAIRE_DATASETS_URL,
},
}
[文档]
@staticmethod
def _extract_text(field):
"""Extract first text value from OpenAIRE's {'$': value} or list-of-dicts."""
if field is None:
return None
if isinstance(field, str):
return field
if isinstance(field, dict):
return field.get("$")
if isinstance(field, list):
for item in field:
val = item.get("$") if isinstance(item, dict) else item
if val:
return val
return None
[文档]
@staticmethod
def _extract_text_list(field):
"""Extract all text values from OpenAIRE's {'$': value} or list-of-dicts."""
if field is None:
return []
if isinstance(field, dict):
val = field.get("$")
return [val] if val else []
if isinstance(field, list):
return [
v
for item in field
for v in [item.get("$") if isinstance(item, dict) else item]
if v
]
return []
[文档]
@staticmethod
def _extract_doi(pid_field):
"""Extract DOI from OpenAIRE pid field."""
if pid_field is None:
return None
pids = [pid_field] if isinstance(pid_field, dict) else (pid_field or [])
for p in pids:
if isinstance(p, dict) and p.get("@classid") == "doi":
return p.get("$")
return None