Source code for tooluniverse.openalex_tool

import requests
import urllib.parse
from .base_tool import BaseTool
from .tool_registry import register_tool


[docs] @register_tool("OpenAlexTool") class OpenAlexTool(BaseTool): """ Tool to retrieve literature from OpenAlex based on search keywords. """
[docs] def __init__(self, tool_config): super().__init__(tool_config) self.base_url = "https://api.openalex.org/works"
[docs] def run(self, arguments): """Main entry point for the tool.""" search_keywords = arguments.get("search_keywords") max_results = arguments.get("max_results", 10) year_from = arguments.get("year_from", None) year_to = arguments.get("year_to", None) open_access = arguments.get("open_access", None) return self.search_literature( search_keywords, max_results, year_from, year_to, open_access )
[docs] def search_literature( self, search_keywords, max_results=10, year_from=None, year_to=None, open_access=None, ): """ Search for literature using OpenAlex API. Parameters: search_keywords (str): Keywords to search for in title, abstract, and content. max_results (int): Maximum number of results to return (default: 10). year_from (int): Start year for publication date filter (optional). year_to (int): End year for publication date filter (optional). open_access (bool): Filter for open access papers only (optional). Returns: list: List of dictionaries containing paper information. """ # Encode search keywords for URL encoded_keywords = urllib.parse.quote(search_keywords) # Build query parameters params = { "search": encoded_keywords, "per-page": min(max_results, 200), # OpenAlex allows max 200 per page "sort": "cited_by_count:desc", # Sort by citation count (most cited first) "mailto": "support@openalex.org", # Polite pool access } # Add year filters if provided filters = [] if year_from is not None and year_to is not None: filters.append(f"publication_year:{year_from}-{year_to}") elif year_from is not None: filters.append(f"from_publication_date:{year_from}-01-01") elif year_to is not None: filters.append(f"to_publication_date:{year_to}-12-31") # Add open access filter if specified if open_access is True: filters.append("is_oa:true") elif open_access is False: filters.append("is_oa:false") if filters: params["filter"] = ",".join(filters) try: response = requests.get(self.base_url, params=params) response.raise_for_status() data = response.json() papers = [] for work in data.get("results", []): paper_info = self._extract_paper_info(work) papers.append(paper_info) print( f"[OpenAlex] Retrieved {len(papers)} papers for keywords: '{search_keywords}'" ) return papers except requests.exceptions.RequestException as e: return f"Error retrieving data from OpenAlex: {e}"
[docs] def _extract_paper_info(self, work): """ Extract relevant information from a work object returned by OpenAlex API. Parameters: work (dict): Work object from OpenAlex API response. Returns: dict: Formatted paper information. """ # Extract title title = work.get("title", "No title available") # Extract abstract (display_name from abstract_inverted_index if available) abstract = None if work.get("abstract_inverted_index"): # Reconstruct abstract from inverted index abstract_dict = work["abstract_inverted_index"] abstract_words = [""] * 500 # Assume max 500 words for word, positions in abstract_dict.items(): for pos in positions: if pos < len(abstract_words): abstract_words[pos] = word abstract = " ".join([word for word in abstract_words if word]).strip() if not abstract: abstract = "Abstract not available" # Extract authors authors = [] for authorship in work.get("authorships", []): author = authorship.get("author", {}) author_name = author.get("display_name", "Unknown Author") authors.append(author_name) # Extract publication year publication_year = work.get("publication_year", "Year not available") # Extract organizations/affiliations organizations = set() for authorship in work.get("authorships", []): for institution in authorship.get("institutions", []): org_name = institution.get("display_name") if org_name: organizations.add(org_name) # Extract additional useful information venue = ( work.get("primary_location", {}) .get("source", {}) .get("display_name", "Unknown venue") ) doi = work.get("doi", "No DOI") citation_count = work.get("cited_by_count", 0) open_access = work.get("open_access", {}).get("is_oa", False) pdf_url = work.get("open_access", {}).get("oa_url") return { "title": title, "abstract": abstract, "authors": authors, "year": publication_year, "organizations": list(organizations), "venue": venue, "doi": doi, "citation_count": citation_count, "open_access": open_access, "pdf_url": pdf_url, "openalex_id": work.get("id", ""), "url": work.get("doi") if work.get("doi") else work.get("id", ""), }
[docs] def get_paper_by_doi(self, doi): """ Retrieve a specific paper by its DOI. Parameters: doi (str): DOI of the paper to retrieve. Returns: dict: Paper information or None if not found. """ try: # OpenAlex supports DOI lookup directly url = f"https://api.openalex.org/works/https://doi.org/{doi}" params = {"mailto": "support@openalex.org"} response = requests.get(url, params=params) response.raise_for_status() work = response.json() return self._extract_paper_info(work) except requests.exceptions.RequestException as e: print(f"Error retrieving paper by DOI {doi}: {e}") return None
[docs] def get_papers_by_author(self, author_name, max_results=10): """ Retrieve papers by a specific author. Parameters: author_name (str): Name of the author to search for. max_results (int): Maximum number of results to return. Returns: list: List of papers by the author. """ try: params = { "filter": f"author.display_name.search:{author_name}", "per-page": min(max_results, 200), "sort": "cited_by_count:desc", "mailto": "support@openalex.org", } response = requests.get(self.base_url, params=params) response.raise_for_status() data = response.json() papers = [] for work in data.get("results", []): paper_info = self._extract_paper_info(work) papers.append(paper_info) print( f"[OpenAlex] Retrieved {len(papers)} papers by author: '{author_name}'" ) return papers except requests.exceptions.RequestException as e: return f"Error retrieving papers by author {author_name}: {e}"