tooluniverse.datagov_tool 源代码

"""
Data.gov (CKAN) API tool for ToolUniverse.

Searches the US federal open data catalog (catalog.data.gov) for datasets
from EPA, CDC, Census, NIH, USDA, NOAA, and 100+ other federal agencies.

API Documentation: https://catalog.data.gov/api/3
"""

import requests
from .base_tool import BaseTool
from .tool_registry import register_tool

DATAGOV_BASE = "https://catalog.data.gov/api/3/action/package_search"


[文档] @register_tool("DataGovTool") class DataGovTool(BaseTool): """Search US federal open data catalog (Data.gov) for datasets."""
[文档] def run(self, arguments=None): arguments = arguments or {} query = arguments.get("query", "").strip() organization = arguments.get("organization") rows = max(1, min(int(arguments.get("rows", 10)), 100)) if not query: return { "status": "error", "error": { "message": "Missing required parameter: query", "details": "Provide a search query string.", }, } params = {"q": query, "rows": rows} if organization: params["fq"] = f"organization:{organization}" try: resp = requests.get(DATAGOV_BASE, params=params, timeout=30) resp.raise_for_status() body = resp.json() except requests.RequestException as exc: return { "status": "error", "error": { "message": "Data.gov API request failed", "details": str(exc), }, } if not body.get("success"): return { "status": "error", "error": { "message": "Data.gov API returned an error", "details": str(body.get("error", "Unknown error")), }, } result = body.get("result", {}) datasets = [] for pkg in result.get("results", []): org = pkg.get("organization") or {} resources = [] for res in (pkg.get("resources") or [])[:10]: resources.append( { "name": res.get("name"), "url": res.get("url"), "format": res.get("format") or None, "description": res.get("description") or None, } ) datasets.append( { "title": pkg.get("title", ""), "description": (pkg.get("notes") or "")[:500] or None, "organization": org.get("name"), "organization_title": org.get("title"), "metadata_modified": pkg.get("metadata_modified"), "tags": [t.get("name", "") for t in (pkg.get("tags") or [])], "resources": resources, "url": pkg.get("url") or None, } ) return { "status": "success", "data": { "query": query, "organization": organization, "total_count": result.get("count", 0), "returned": len(datasets), "datasets": datasets, }, "metadata": { "source": "Data.gov (CKAN)", "api": DATAGOV_BASE, }, }