Source code for tooluniverse.doaj_tool

import requests
from .base_tool import BaseTool
from .tool_registry import register_tool



[docs]
@register_tool("DOAJTool")
class DOAJTool(BaseTool):
    """
    Search DOAJ (Directory of Open Access Journals) articles and journals.

    Parameters (arguments):
        query (str): Query string (Lucene syntax supported by DOAJ)
        max_results (int): Max number of results (default 10, max 100)
        type (str): "articles" or "journals" (default: "articles")
    """


[docs]
    def __init__(self, tool_config):
        super().__init__(tool_config)
        self.base_url = "https://doaj.org/api/search"



[docs]
    def run(self, arguments=None):
        arguments = arguments or {}
        query = arguments.get("query")
        search_type = arguments.get("type", "articles")
        max_results = int(arguments.get("max_results", 10))

        if not query:
            return {"error": "`query` parameter is required."}

        if search_type not in ["articles", "journals"]:
            return {"error": "`type` must be 'articles' or 'journals'."}

        endpoint = f"{self.base_url}/{search_type}/{query}"
        params = {
            "pageSize": max(1, min(max_results, 100)),
        }
        try:
            resp = requests.get(endpoint, params=params, timeout=20)
            resp.raise_for_status()
            data = resp.json()
        except requests.RequestException as e:
            return {
                "error": "Network/API error calling DOAJ",
                "reason": str(e),
            }
        except ValueError:
            return {"error": "Failed to decode DOAJ response as JSON"}

        results = data.get("results", [])
        items = []
        if search_type == "articles":
            for r in results:
                b = r.get("bibjson", {})
                title = b.get("title")

                # Extract year
                year = None
                try:
                    year = int((b.get("year") or 0))
                except Exception:
                    year = b.get("year")

                # Extract author information
                authors = [a.get("name") for a in b.get("author", []) if a.get("name")]

                # Extract DOI
                doi = None
                for i in b.get("identifier", []):
                    if i.get("type") == "doi":
                        doi = i.get("id")
                        break

                # Extract URL
                url = None
                for link_item in b.get("link", []):
                    if link_item.get("type") == "fulltext" or link_item.get("url"):
                        url = link_item.get("url")
                        break

                # Extract journal information
                journal = (b.get("journal") or {}).get("title")

                # Extract abstract
                abstract = b.get("abstract")
                if abstract and isinstance(abstract, str):
                    # Clean HTML tags
                    import re

                    abstract = re.sub(r"<[^>]+>", "", abstract)
                    abstract = abstract.strip()

                # Extract keywords
                keywords = []
                subject_list = b.get("subject", [])
                if isinstance(subject_list, list):
                    for subject in subject_list:
                        if isinstance(subject, dict):
                            term = subject.get("term", "")
                            if term:
                                keywords.append(term)
                        elif isinstance(subject, str):
                            keywords.append(subject)

                # Extract citation count (DOAJ usually doesn't provide this)
                citations = 0

                # Open access status (DOAJ is all open access)
                open_access = True

                # Extract article type
                article_type = b.get("type", "journal-article")

                # Extract publisher
                publisher = (b.get("journal") or {}).get("publisher")

                # Handle missing abstract
                if not abstract:
                    abstract = "Abstract not available"

                items.append(
                    {
                        "title": title or "Title not available",
                        "abstract": abstract,
                        "authors": (
                            authors if authors else "Author information not available"
                        ),
                        "year": year,
                        "doi": doi or "DOI not available",
                        "venue": journal or "Journal information not available",
                        "url": url or "URL not available",
                        "citations": citations,
                        "open_access": open_access,
                        "keywords": keywords if keywords else "Keywords not available",
                        "article_type": article_type,
                        "publisher": publisher or "Publisher information not available",
                        "source": "DOAJ",
                        "data_quality": {
                            "has_abstract": bool(
                                abstract and abstract != "Abstract not available"
                            ),
                            "has_authors": bool(authors),
                            "has_journal": bool(journal),
                            "has_year": bool(year),
                            "has_doi": bool(doi),
                            "has_citations": False,  # DOAJ usually doesn't provide citation count
                            "has_keywords": bool(keywords),
                            "has_url": bool(url),
                        },
                    }
                )
        else:
            for r in results:
                b = r.get("bibjson", {})
                title = b.get("title")
                publisher = b.get("publisher")
                eissn = None
                pissn = None
                for i in b.get("identifier", []):
                    if i.get("type") == "eissn":
                        eissn = i.get("id")
                    if i.get("type") == "pissn":
                        pissn = i.get("id")
                homepage_url = None
                for link_item in b.get("link", []):
                    if link_item.get("url"):
                        homepage_url = link_item.get("url")
                        break
                subjects = [
                    s.get("term") for s in b.get("subject", []) if s.get("term")
                ]
                items.append(
                    {
                        "title": title,
                        "publisher": publisher,
                        "eissn": eissn,
                        "pissn": pissn,
                        "subjects": subjects,
                        "url": homepage_url,
                        "source": "DOAJ",
                    }
                )

        return items