Source code for tooluniverse.wikipedia_tool

"""
Wikipedia tools for ToolUniverse using MediaWiki API.

This module provides access to Wikipedia articles, search, and content
extraction using the public MediaWiki API. No API key is required.
"""

import requests
from .base_tool import BaseTool
from .tool_registry import register_tool


[docs] @register_tool("WikipediaSearchTool") class WikipediaSearchTool(BaseTool): """ Search Wikipedia articles using MediaWiki API. Parameters (arguments): query (str): Search query string limit (int): Maximum number of results to return (default: 10, max: 50) language (str): Wikipedia language code (default: "en") """
[docs] def __init__(self, tool_config): super().__init__(tool_config) self.base_url = "https://{language}.wikipedia.org/w/api.php"
[docs] def run(self, arguments=None): arguments = arguments or {} query = arguments.get("query", "").strip() limit = arguments.get("limit", 10) language = arguments.get("language", "en") if not query: return {"error": "`query` parameter is required."} # Validate limit limit = max(1, min(limit, 50)) api_url = self.base_url.format(language=language) params = { "action": "query", "list": "search", "srsearch": query, "srlimit": limit, "format": "json", "srnamespace": 0, # Only search in main namespace (articles) } headers = { "User-Agent": "ToolUniverse/1.0 (https://github.com)", } try: resp = requests.get(api_url, params=params, headers=headers, timeout=30) resp.raise_for_status() data = resp.json() if "error" in data: return {"error": f"Wikipedia API error: {data['error']}"} search_results = data.get("query", {}).get("search", []) results = [] for item in search_results: results.append( { "title": item.get("title", ""), "snippet": item.get("snippet", ""), "size": item.get("size", 0), "wordcount": item.get("wordcount", 0), "timestamp": item.get("timestamp", ""), } ) return { "query": query, "language": language, "total_results": len(results), "results": results, } except requests.RequestException as e: return { "error": "Network/API error calling Wikipedia", "reason": str(e), } except (ValueError, KeyError) as e: return { "error": "Failed to parse Wikipedia API response", "reason": str(e), }
[docs] @register_tool("WikipediaContentTool") class WikipediaContentTool(BaseTool): """ Extract content from Wikipedia articles using MediaWiki API. Parameters (arguments): title (str): Article title (required) language (str): Wikipedia language code (default: "en") extract_type (str): Type of content - "intro" (first paragraph), "summary" (first few paragraphs), or "full" (entire article) (default: "summary") max_chars (int): Maximum characters for summary/extract (default: 2000) """
[docs] def __init__(self, tool_config): super().__init__(tool_config) self.base_url = "https://{language}.wikipedia.org/w/api.php"
[docs] def run(self, arguments=None): arguments = arguments or {} title = arguments.get("title", "").strip() language = arguments.get("language", "en") extract_type = arguments.get("extract_type", "summary") max_chars = arguments.get("max_chars", 2000) if not title: return {"error": "`title` parameter is required."} api_url = self.base_url.format(language=language) # Determine what to extract if extract_type == "intro": exintro = True explaintext = True elif extract_type == "summary": exintro = True explaintext = True elif extract_type == "full": exintro = False explaintext = True else: exintro = True explaintext = True exchars = max_chars if extract_type != "full" else None params = { "action": "query", "titles": title, "prop": "extracts|info|links", "exintro": exintro, "explaintext": explaintext, "exchars": exchars, "format": "json", "inprop": "url", } # Remove None values params = {k: v for k, v in params.items() if v is not None} headers = { "User-Agent": "ToolUniverse/1.0 (https://github.com)", } try: resp = requests.get(api_url, params=params, headers=headers, timeout=30) resp.raise_for_status() data = resp.json() if "error" in data: return {"error": f"Wikipedia API error: {data['error']}"} pages = data.get("query", {}).get("pages", {}) if not pages: return {"error": f"Article '{title}' not found."} # Get first page (should only be one) page_id = list(pages.keys())[0] page_data = pages[page_id] if page_id == "-1": return {"error": f"Article '{title}' not found."} extract = page_data.get("extract", "") fullurl = page_data.get("fullurl", "") links = page_data.get("links", []) result = { "title": page_data.get("title", title), "pageid": int(page_id), "url": fullurl, "content": extract, "content_length": len(extract), "extract_type": extract_type, } # Add links if available if links: # Limit to 20 links result["links"] = [link.get("title", "") for link in links[:20]] return result except requests.RequestException as e: return { "error": "Network/API error calling Wikipedia", "reason": str(e), } except (ValueError, KeyError) as e: return { "error": "Failed to parse Wikipedia API response", "reason": str(e), }
[docs] @register_tool("WikipediaSummaryTool") class WikipediaSummaryTool(BaseTool): """ Get a brief summary/introduction from a Wikipedia article. This is a convenience tool that extracts just the first paragraph(s) of an article. Parameters (arguments): title (str): Article title (required) language (str): Wikipedia language code (default: "en") max_chars (int): Maximum characters to return (default: 500) """
[docs] def __init__(self, tool_config): super().__init__(tool_config) self.content_tool = WikipediaContentTool(tool_config)
[docs] def run(self, arguments=None): arguments = arguments or {} # Override extract_type to always get intro arguments["extract_type"] = "intro" arguments["max_chars"] = arguments.get("max_chars", 500) return self.content_tool.run(arguments)