Source code for tooluniverse.odphp_tool

import re
import requests
from typing import Dict, Any, Optional, List
from .base_tool import BaseTool
from .tool_registry import register_tool

# Optional but recommended: text extraction for HTML
try:
    from bs4 import BeautifulSoup  # pip install beautifulsoup4
except ImportError:
    BeautifulSoup = None  # We’ll guard uses so the tool still loads

ODPHP_BASE_URL = "https://odphp.health.gov/myhealthfinder/api/v4"



[docs]
class ODPHPRESTTool(BaseTool):
    """Base class for ODPHP (MyHealthfinder) REST API tools."""


[docs]
    def __init__(self, tool_config):
        super().__init__(tool_config)
        self.endpoint = tool_config["fields"]["endpoint"]


    def _make_request(self, params: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
        url = f"{ODPHP_BASE_URL}{self.endpoint}"
        try:
            resp = requests.get(url, params=params, timeout=30)
            resp.raise_for_status()
            data = resp.json()
            return {
                "data": data.get("Result"),
                "metadata": {
                    "source": "ODPHP MyHealthfinder",
                    "endpoint": url,
                    "query": params,
                },
            }
        except requests.exceptions.RequestException as e:
            return {"error": f"Request failed: {str(e)}"}
        except ValueError as e:
            return {"error": f"Failed to parse JSON: {str(e)}"}



def _sections_array(resource: Dict[str, Any]) -> List[Dict[str, Any]]:
    """
    Tolerant accessor for the sections array.
    Data sometimes uses Sections.Section (capital S) and sometimes Sections.section (lowercase).
    """
    sect = resource.get("Sections") or {}
    arr = sect.get("Section")
    if not isinstance(arr, list):
        arr = sect.get("section")
    return arr if isinstance(arr, list) else []


def _strip_html_to_text(html: str) -> str:
    if not html:
        return ""
    if BeautifulSoup is None:
        # fallback: very light tag remover
        text = re.sub(r"<[^>]+>", " ", html)
        return re.sub(r"\s+", " ", text).strip()
    soup = BeautifulSoup(html, "html.parser")
    # remove scripts/styles
    for t in soup(["script", "style", "noscript"]):
        t.decompose()
    text = soup.get_text("\n", strip=True)
    text = re.sub(r"\n{2,}", "\n\n", text)
    return text.strip()



[docs]
@register_tool("ODPHPMyHealthfinder")
class ODPHPMyHealthfinder(ODPHPRESTTool):
    """Search for demographic-specific health recommendations (MyHealthfinder)."""


[docs]
    def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
        params: Dict[str, Any] = {}
        if "lang" in arguments:
            params["lang"] = arguments["lang"]
        if "age" in arguments:
            params["age"] = arguments["age"]
        if "sex" in arguments:
            params["sex"] = arguments["sex"]
        if "pregnant" in arguments:
            params["pregnant"] = arguments["pregnant"]

        res = self._make_request(params)

        # Optional: attach PlainSections if requested
        if (
            isinstance(res, dict)
            and not res.get("error")
            and arguments.get("strip_html")
        ):
            data = res.get("data") or {}
            resources = (
                ((data.get("Resources") or {}).get("All") or {}).get("Resource")
            ) or []
            if isinstance(resources, list):
                for r in resources:
                    plain = []
                    for sec in _sections_array(r):
                        plain.append(
                            {
                                "Title": sec.get("Title", ""),
                                "PlainContent": _strip_html_to_text(
                                    sec.get("Content", "")
                                ),
                            }
                        )
                    if plain:
                        r["PlainSections"] = plain
        return res





[docs]
@register_tool("ODPHPItemList")
class ODPHPItemList(ODPHPRESTTool):
    """Retrieve list of topics or categories."""


[docs]
    def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
        params: Dict[str, Any] = {}
        if "lang" in arguments:
            params["lang"] = arguments["lang"]
        if "type" in arguments:
            params["type"] = arguments["type"]
        return self._make_request(params)





[docs]
@register_tool("ODPHPTopicSearch")
class ODPHPTopicSearch(ODPHPRESTTool):
    """Search for health topics by ID, category, or keyword."""


[docs]
    def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
        params: Dict[str, Any] = {}
        if "lang" in arguments:
            params["lang"] = arguments["lang"]
        if "topicId" in arguments:
            params["topicId"] = arguments["topicId"]
        if "categoryId" in arguments:
            params["categoryId"] = arguments["categoryId"]
        if "keyword" in arguments:
            params["keyword"] = arguments["keyword"]

        res = self._make_request(params)

        # Optional: attach PlainSections if requested
        if (
            isinstance(res, dict)
            and not res.get("error")
            and arguments.get("strip_html")
        ):
            data = res.get("data") or {}
            resources = ((data.get("Resources") or {}).get("Resource")) or []
            if isinstance(resources, list):
                for r in resources:
                    plain = []
                    for sec in _sections_array(r):
                        plain.append(
                            {
                                "Title": sec.get("Title", ""),
                                "PlainContent": _strip_html_to_text(
                                    sec.get("Content", "")
                                ),
                            }
                        )
                    if plain:
                        r["PlainSections"] = plain
        return res





[docs]
@register_tool("ODPHPOutlinkFetch")
class ODPHPOutlinkFetch(BaseTool):
    """
    Fetch article pages referenced by AccessibleVersion / RelatedItems.Url and return readable text.
    - HTML: extracts main/article/body text; strips nav/aside/footer/script/style.
    - PDF or non-HTML: returns metadata + URL so the agent can surface it.
    """


[docs]
    def __init__(self, tool_config):
        super().__init__(tool_config)
        self.timeout = 30



[docs]
    def _extract_text(self, html: str) -> Dict[str, str]:
        if BeautifulSoup is None:
            # fallback: crude extraction
            title = ""
            # attempt to find <title>
            m = re.search(r"<title[^>]*>(.*?)</title>", html, flags=re.I | re.S)
            if m:
                title = re.sub(r"\s+", " ", m.group(1)).strip()
            text = re.sub(r"<[^>]+>", " ", html)
            text = re.sub(r"\s+", " ", text).strip()
            return {"title": title, "text": text}

        soup = BeautifulSoup(html, "html.parser")
        # remove non-content
        for tag in soup(["script", "style", "noscript", "footer", "nav", "aside"]):
            tag.decompose()

        candidate = soup.find("main") or soup.find("article") or soup.body or soup
        title = ""
        # prefer main/article heading, else <title>
        h = candidate.find(["h1", "h2"]) if candidate else None
        if h:
            title = h.get_text(" ", strip=True)
        elif soup.title and soup.title.string:
            title = soup.title.string.strip()

        text = (
            candidate.get_text("\n", strip=True)
            if candidate
            else soup.get_text("\n", strip=True)
        )
        text = re.sub(r"\n{2,}", "\n\n", text)
        return {"title": title, "text": text}



[docs]
    def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
        urls: List[str] = arguments.get("urls", [])
        max_chars: Optional[int] = arguments.get("max_chars")
        return_html: bool = bool(arguments.get("return_html", False))

        if not urls or not isinstance(urls, list):
            return {"error": "Missing required parameter 'urls' (array of 1–3 URLs)."}

        out: List[Dict[str, Any]] = []
        for u in urls[:3]:
            try:
                resp = requests.get(u, timeout=self.timeout, allow_redirects=True)
                ct = resp.headers.get("Content-Type", "")
                item: Dict[str, Any] = {
                    "url": u,
                    "status": resp.status_code,
                    "content_type": ct,
                }

                if "text/html" in ct or (not ct and resp.text.startswith("<!")):
                    ex = self._extract_text(resp.text)
                    if isinstance(max_chars, int) and max_chars > 0:
                        ex["text"] = ex["text"][:max_chars]
                    item.update(ex)
                    if return_html:
                        item["html"] = resp.text
                elif "pdf" in ct or u.lower().endswith(".pdf"):
                    item["title"] = "(PDF Document)"
                    item["text"] = f"[PDF file: {u}]"
                else:
                    item["title"] = ""
                    item["text"] = ""
                out.append(item)
            except requests.exceptions.RequestException as e:
                out.append(
                    {
                        "url": u,
                        "status": 0,
                        "content_type": "",
                        "title": "",
                        "text": "",
                        "error": str(e),
                    }
                )

        return {"results": out, "metadata": {"source": "ODPHP OutlinkFetch"}}