Source code for tooluniverse.openaire_tool
import requests
from .base_tool import BaseTool
from .tool_registry import register_tool
[docs]
@register_tool("OpenAIRETool")
class OpenAIRETool(BaseTool):
"""
Search OpenAIRE Explore for research products (publications by default).
Parameters (arguments):
query (str): Query string
max_results (int): Max number of results (default 10, max 100)
type (str): product type filter: publications | datasets | software
"""
[docs]
def __init__(self, tool_config):
super().__init__(tool_config)
self.base_url = "https://api.openaire.eu/search/publications"
[docs]
def run(self, arguments=None):
arguments = arguments or {}
query = arguments.get("query")
max_results = int(arguments.get("max_results", 10))
prod_type = arguments.get("type", "publications")
if not query:
return {
"status": "success",
"data": {
"status": "error",
"error": "`query` parameter is required.",
"query": "",
"type": prod_type,
"total_results": 0,
"results": [],
},
}
endpoint = self._endpoint_for_type(prod_type)
if endpoint is None:
return {
"status": "success",
"data": {
"status": "error",
"error": "Unsupported type. Use publications/datasets/software.",
"query": query,
"type": prod_type,
"total_results": 0,
"results": [],
},
}
# OpenAIRE's legacy `query` param now returns HTTP 400; the current
# search API (publications, datasets, software, projects) uses
# `keywords` for free-text search.
params = {
"format": "json",
"size": max(1, min(max_results, 100)),
"keywords": query,
}
try:
resp = requests.get(endpoint, params=params, timeout=20)
resp.raise_for_status()
data = resp.json()
except requests.RequestException as e:
return {
"status": "success",
"data": {
"status": "error",
"error": "Network/API error calling OpenAIRE",
"reason": str(e),
"query": query,
"type": prod_type,
"total_results": 0,
"results": [],
},
}
except ValueError:
return {
"status": "success",
"data": {
"status": "error",
"error": "Failed to decode OpenAIRE response as JSON",
"query": query,
"type": prod_type,
"total_results": 0,
"results": [],
},
}
if prod_type == "projects":
results = self._normalize_projects(data)
else:
results = self._normalize(data, prod_type)
# Total reported by OpenAIRE (across all pages), not just this page
total_results = self._total_from_header(data)
if total_results is None:
total_results = len(results)
return {
"status": "success",
"data": {
"status": "success",
"query": query,
"type": prod_type,
"total_results": total_results,
"returned": len(results),
"results": results,
},
}
[docs]
def _endpoint_for_type(self, prod_type):
if prod_type == "publications":
return "https://api.openaire.eu/search/publications"
if prod_type == "datasets":
return "https://api.openaire.eu/search/datasets"
if prod_type == "software":
return "https://api.openaire.eu/search/software"
if prod_type == "projects":
return "https://api.openaire.eu/search/projects"
return None
[docs]
@staticmethod
def _total_from_header(data):
"""Extract the total result count from the OpenAIRE response header."""
try:
total = data.get("response", {}).get("header", {}).get("total")
if isinstance(total, dict):
total = total.get("$")
return int(total) if total is not None else None
except (ValueError, TypeError, AttributeError):
return None
[docs]
@staticmethod
def _scalar(value):
"""OpenAIRE wraps scalars as {'$': value}; unwrap to the plain value."""
if isinstance(value, dict):
return value.get("$")
if isinstance(value, list):
for item in value:
unwrapped = OpenAIRETool._scalar(item)
if unwrapped is not None:
return unwrapped
return None
return value
[docs]
def _normalize_projects(self, data):
"""Normalize OpenAIRE funded-project / grant search results."""
results = []
try:
items = data.get("response", {}).get("results", {}).get("result", [])
except Exception:
items = []
if isinstance(items, dict):
items = [items]
for it in items:
metadata = it.get("metadata", {}) if isinstance(it, dict) else {}
project = metadata.get("oaf:entity", {}).get("oaf:project", {})
if not isinstance(project, dict):
continue
# Funder + funding stream live in the fundingtree
funding_tree = project.get("fundingtree", {})
if isinstance(funding_tree, list):
funding_tree = funding_tree[0] if funding_tree else {}
funder = (
funding_tree.get("funder", {}) if isinstance(funding_tree, dict) else {}
)
level0 = (
funding_tree.get("funding_level_0", {})
if isinstance(funding_tree, dict)
else {}
)
results.append(
{
"code": self._scalar(project.get("code")),
"acronym": self._scalar(project.get("acronym")),
"title": self._scalar(project.get("title")),
"funder": self._scalar(funder.get("name"))
if isinstance(funder, dict)
else None,
"funder_shortname": self._scalar(funder.get("shortname"))
if isinstance(funder, dict)
else None,
"funding_stream": self._scalar(level0.get("name"))
if isinstance(level0, dict)
else None,
"start_date": self._scalar(project.get("startdate")),
"end_date": self._scalar(project.get("enddate")),
"funded_amount": self._scalar(project.get("fundedamount")),
"total_cost": self._scalar(project.get("totalcost")),
"currency": self._scalar(project.get("currency")),
"website": self._scalar(project.get("websiteurl")),
"type": "projects",
"source": "OpenAIRE",
}
)
return results
[docs]
def _normalize(self, data, prod_type):
results = []
# OpenAIRE JSON has a root 'response' with 'results' → 'result' list
try:
items = data.get("response", {}).get("results", {}).get("result", [])
except Exception:
items = []
for it in items:
# header may contain identifiers, not used presently
_ = it.get("header", {}) if isinstance(it.get("header"), dict) else {}
metadata = (
it.get("metadata", {}) if isinstance(it.get("metadata"), dict) else {}
)
title = None
authors = []
year = None
doi = None
url = None
# Titles can be nested in 'oaf:result' structure
result_obj = metadata.get("oaf:result", {})
if isinstance(result_obj, dict):
t = result_obj.get("title")
if isinstance(t, list) and t:
title = t[0].get("$")
elif isinstance(t, dict):
title = t.get("$")
# Authors
creators = result_obj.get("creator", [])
if isinstance(creators, list):
for c in creators:
name = c.get("$")
if name:
authors.append(name)
# Year
date_obj = result_obj.get("dateofacceptance") or result_obj.get("date")
if isinstance(date_obj, dict):
year = date_obj.get("year") or date_obj.get("$")
# DOI and URL
pid = result_obj.get("pid", [])
if isinstance(pid, list):
for p in pid:
if p.get("@classid") == "doi":
doi = p.get("$")
bestaccessright = result_obj.get("bestaccessright", {})
if isinstance(bestaccessright, dict):
url_value = bestaccessright.get("$")
if url_value:
url = url_value
results.append(
{
"title": title,
"authors": authors,
"year": year,
"doi": doi,
"url": url,
"type": prod_type,
"source": "OpenAIRE",
}
)
return results