Source code for tooluniverse.doaj_tool

import requests
from .base_tool import BaseTool
from .tool_registry import register_tool


[docs] @register_tool("DOAJTool") class DOAJTool(BaseTool): """ Search DOAJ (Directory of Open Access Journals) articles and journals. Parameters (arguments): query (str): Query string (Lucene syntax supported by DOAJ) max_results (int): Max number of results (default 10, max 100) type (str): "articles" or "journals" (default: "articles") """
[docs] def __init__(self, tool_config): super().__init__(tool_config) self.base_url = "https://doaj.org/api/search"
[docs] def run(self, arguments=None): arguments = arguments or {} query = arguments.get("query") search_type = arguments.get("type", "articles") max_results = int(arguments.get("max_results", 10)) if not query: return {"error": "`query` parameter is required."} if search_type not in ["articles", "journals"]: return {"error": "`type` must be 'articles' or 'journals'."} endpoint = f"{self.base_url}/{search_type}/{query}" params = { "pageSize": max(1, min(max_results, 100)), } try: resp = requests.get(endpoint, params=params, timeout=20) resp.raise_for_status() data = resp.json() except requests.RequestException as e: return { "error": "Network/API error calling DOAJ", "reason": str(e), } except ValueError: return {"error": "Failed to decode DOAJ response as JSON"} results = data.get("results", []) items = [] if search_type == "articles": for r in results: b = r.get("bibjson", {}) title = b.get("title") # Extract year year = None try: year = int((b.get("year") or 0)) except Exception: year = b.get("year") # Extract author information authors = [a.get("name") for a in b.get("author", []) if a.get("name")] # Extract DOI doi = None for i in b.get("identifier", []): if i.get("type") == "doi": doi = i.get("id") break # Extract URL url = None for link_item in b.get("link", []): if link_item.get("type") == "fulltext" or link_item.get("url"): url = link_item.get("url") break # Extract journal information journal = (b.get("journal") or {}).get("title") # Extract abstract abstract = b.get("abstract") if abstract and isinstance(abstract, str): # Clean HTML tags import re abstract = re.sub(r"<[^>]+>", "", abstract) abstract = abstract.strip() # Extract keywords keywords = [] subject_list = b.get("subject", []) if isinstance(subject_list, list): for subject in subject_list: if isinstance(subject, dict): term = subject.get("term", "") if term: keywords.append(term) elif isinstance(subject, str): keywords.append(subject) # Extract citation count (DOAJ usually doesn't provide this) citations = 0 # Open access status (DOAJ is all open access) open_access = True # Extract article type article_type = b.get("type", "journal-article") # Extract publisher publisher = (b.get("journal") or {}).get("publisher") # Handle missing abstract if not abstract: abstract = "Abstract not available" items.append( { "title": title or "Title not available", "abstract": abstract, "authors": ( authors if authors else "Author information not available" ), "year": year, "doi": doi or "DOI not available", "venue": journal or "Journal information not available", "url": url or "URL not available", "citations": citations, "open_access": open_access, "keywords": keywords if keywords else "Keywords not available", "article_type": article_type, "publisher": publisher or "Publisher information not available", "source": "DOAJ", "data_quality": { "has_abstract": bool( abstract and abstract != "Abstract not available" ), "has_authors": bool(authors), "has_journal": bool(journal), "has_year": bool(year), "has_doi": bool(doi), "has_citations": False, # DOAJ usually doesn't provide citation count "has_keywords": bool(keywords), "has_url": bool(url), }, } ) else: for r in results: b = r.get("bibjson", {}) title = b.get("title") publisher = b.get("publisher") eissn = None pissn = None for i in b.get("identifier", []): if i.get("type") == "eissn": eissn = i.get("id") if i.get("type") == "pissn": pissn = i.get("id") homepage_url = None for link_item in b.get("link", []): if link_item.get("url"): homepage_url = link_item.get("url") break subjects = [ s.get("term") for s in b.get("subject", []) if s.get("term") ] items.append( { "title": title, "publisher": publisher, "eissn": eissn, "pissn": pissn, "subjects": subjects, "url": homepage_url, "source": "DOAJ", } ) return items