Source code for tooluniverse.wikipedia_tool
"""
Wikipedia tools for ToolUniverse using MediaWiki API.
This module provides access to Wikipedia articles, search, and content
extraction using the public MediaWiki API. No API key is required.
"""
import requests
from .base_tool import BaseTool
from .tool_registry import register_tool
[docs]
@register_tool("WikipediaSearchTool")
class WikipediaSearchTool(BaseTool):
"""
Search Wikipedia articles using MediaWiki API.
Parameters (arguments):
query (str): Search query string
limit (int): Maximum number of results to return (default: 10, max: 50)
language (str): Wikipedia language code (default: "en")
"""
[docs]
def __init__(self, tool_config):
super().__init__(tool_config)
self.base_url = "https://{language}.wikipedia.org/w/api.php"
[docs]
def run(self, arguments=None):
arguments = arguments or {}
query = arguments.get("query", "").strip()
limit = arguments.get("limit", 10)
language = arguments.get("language", "en")
if not query:
return {"error": "`query` parameter is required."}
# Validate limit
limit = max(1, min(limit, 50))
api_url = self.base_url.format(language=language)
params = {
"action": "query",
"list": "search",
"srsearch": query,
"srlimit": limit,
"format": "json",
"srnamespace": 0, # Only search in main namespace (articles)
}
headers = {
"User-Agent": "ToolUniverse/1.0 (https://github.com)",
}
try:
resp = requests.get(api_url, params=params, headers=headers, timeout=30)
resp.raise_for_status()
data = resp.json()
if "error" in data:
return {"error": f"Wikipedia API error: {data['error']}"}
search_results = data.get("query", {}).get("search", [])
results = []
for item in search_results:
results.append(
{
"title": item.get("title", ""),
"snippet": item.get("snippet", ""),
"size": item.get("size", 0),
"wordcount": item.get("wordcount", 0),
"timestamp": item.get("timestamp", ""),
}
)
return {
"query": query,
"language": language,
"total_results": len(results),
"results": results,
}
except requests.RequestException as e:
return {
"error": "Network/API error calling Wikipedia",
"reason": str(e),
}
except (ValueError, KeyError) as e:
return {
"error": "Failed to parse Wikipedia API response",
"reason": str(e),
}
[docs]
@register_tool("WikipediaContentTool")
class WikipediaContentTool(BaseTool):
"""
Extract content from Wikipedia articles using MediaWiki API.
Parameters (arguments):
title (str): Article title (required)
language (str): Wikipedia language code (default: "en")
extract_type (str): Type of content - "intro" (first paragraph),
"summary" (first few paragraphs), or "full"
(entire article) (default: "summary")
max_chars (int): Maximum characters for summary/extract
(default: 2000)
"""
[docs]
def __init__(self, tool_config):
super().__init__(tool_config)
self.base_url = "https://{language}.wikipedia.org/w/api.php"
[docs]
def run(self, arguments=None):
arguments = arguments or {}
title = arguments.get("title", "").strip()
language = arguments.get("language", "en")
extract_type = arguments.get("extract_type", "summary")
max_chars = arguments.get("max_chars", 2000)
if not title:
return {"error": "`title` parameter is required."}
api_url = self.base_url.format(language=language)
# Determine what to extract
if extract_type == "intro":
exintro = True
explaintext = True
elif extract_type == "summary":
exintro = True
explaintext = True
elif extract_type == "full":
exintro = False
explaintext = True
else:
exintro = True
explaintext = True
exchars = max_chars if extract_type != "full" else None
params = {
"action": "query",
"titles": title,
"prop": "extracts|info|links",
"exintro": exintro,
"explaintext": explaintext,
"exchars": exchars,
"format": "json",
"inprop": "url",
}
# Remove None values
params = {k: v for k, v in params.items() if v is not None}
headers = {
"User-Agent": "ToolUniverse/1.0 (https://github.com)",
}
try:
resp = requests.get(api_url, params=params, headers=headers, timeout=30)
resp.raise_for_status()
data = resp.json()
if "error" in data:
return {"error": f"Wikipedia API error: {data['error']}"}
pages = data.get("query", {}).get("pages", {})
if not pages:
return {"error": f"Article '{title}' not found."}
# Get first page (should only be one)
page_id = list(pages.keys())[0]
page_data = pages[page_id]
if page_id == "-1":
return {"error": f"Article '{title}' not found."}
extract = page_data.get("extract", "")
fullurl = page_data.get("fullurl", "")
links = page_data.get("links", [])
result = {
"title": page_data.get("title", title),
"pageid": int(page_id),
"url": fullurl,
"content": extract,
"content_length": len(extract),
"extract_type": extract_type,
}
# Add links if available
if links:
# Limit to 20 links
result["links"] = [link.get("title", "") for link in links[:20]]
return result
except requests.RequestException as e:
return {
"error": "Network/API error calling Wikipedia",
"reason": str(e),
}
except (ValueError, KeyError) as e:
return {
"error": "Failed to parse Wikipedia API response",
"reason": str(e),
}
[docs]
@register_tool("WikipediaSummaryTool")
class WikipediaSummaryTool(BaseTool):
"""
Get a brief summary/introduction from a Wikipedia article.
This is a convenience tool that extracts just the first
paragraph(s) of an article.
Parameters (arguments):
title (str): Article title (required)
language (str): Wikipedia language code (default: "en")
max_chars (int): Maximum characters to return (default: 500)
"""
[docs]
def __init__(self, tool_config):
super().__init__(tool_config)
self.content_tool = WikipediaContentTool(tool_config)
[docs]
def run(self, arguments=None):
arguments = arguments or {}
# Override extract_type to always get intro
arguments["extract_type"] = "intro"
arguments["max_chars"] = arguments.get("max_chars", 500)
return self.content_tool.run(arguments)