Source code for tooluniverse.pubchem_tool

# pubchem_tool.py

import requests
import re
from .base_tool import BaseTool
from .tool_registry import register_tool

# Base URL for PubChem PUG-REST
PUBCHEM_BASE_URL = "https://pubchem.ncbi.nlm.nih.gov/rest/pug"

# Base URL for PubChem PUG-View
PUBCHEM_PUGVIEW_URL = "https://pubchem.ncbi.nlm.nih.gov/rest/pug_view"


[docs] @register_tool("PubChemRESTTool") class PubChemRESTTool(BaseTool): """ Generic PubChem PUG-REST tool class. Directly concatenates URL from the fields.endpoint template and sends requests to PubChem PUG-REST. """
[docs] def __init__(self, tool_config): super().__init__(tool_config) # Read endpoint template directly from fields config self.endpoint_template = tool_config["fields"]["endpoint"] # input_description and output_description might not be used, but kept for LLM reference self.input_description = tool_config["fields"].get("input_description", "") self.output_description = tool_config["fields"].get("output_description", "") # If property_list exists, it will be used to replace {property_list} placeholder self.property_list = tool_config["fields"].get("property_list", None) # Parameter schema (properties may include required field) self.param_schema = tool_config["parameter"]["properties"] self.use_pugview = tool_config["fields"].get("use_pugview", False) self.output_format = tool_config["fields"].get("return_format", None)
def _build_url(self, arguments: dict) -> str: """ Use regex to replace all {placeholder} in endpoint_template to generate complete URL path. For example endpoint_template="/compound/cid/{cid}/property/{property_list}/JSON" arguments={"cid":2244}, property_list=["MolecularWeight","IUPACName"] → "/compound/cid/2244/property/MolecularWeight,IUPACName/JSON" Finally returns "https://pubchem.ncbi.nlm.nih.gov/rest/pug" + concatenated path. """ url_path = self.endpoint_template # First replace property_list (if exists and endpoint_template contains this placeholder) if self.property_list and "{property_list}" in url_path: prop_str = ",".join(self.property_list) url_path = url_path.replace("{property_list}", prop_str) # Find all placeholders {xxx} in template placeholders = re.findall(r"\{([^{}]+)\}", url_path) for ph in placeholders: if ph not in arguments: # If a placeholder cannot find corresponding value in arguments, report error raise ValueError( f"Missing required parameter '{ph}' to replace placeholder in URL." ) val = arguments[ph] # If input value is a list, join with commas if isinstance(val, list): val_str = ",".join(map(str, val)) else: val_str = str(val) url_path = url_path.replace(f"{{{ph}}}", val_str) # Handle xref_types parameter if "xref_types" in arguments: xref_list = ",".join(arguments["xref_types"]) url_path = url_path.replace("{xref_list}", xref_list) # Finally combine into complete URL if self.use_pugview: full_url = PUBCHEM_PUGVIEW_URL + url_path else: full_url = PUBCHEM_BASE_URL + url_path # Handle special parameters if "threshold" in arguments: # Convert 0-1 threshold to 0-100 integer threshold = float(arguments["threshold"]) if 0 <= threshold <= 1: threshold = int(threshold * 100) # Add threshold parameter to URL if "?" in full_url: full_url += f"&Threshold={threshold}" else: full_url += f"?Threshold={threshold}" return full_url
[docs] def run(self, arguments: dict): # 1. Validate required parameters for key, prop in self.param_schema.items(): if prop.get("required", False) and key not in arguments: return {"error": f"Parameter '{key}' is required."} # 2. Build URL try: url = self._build_url(arguments) except ValueError as e: return {"error": str(e)} # 3. Send HTTP GET request try: # Increase timeout to 30 seconds and add MaxRecords parameter to limit results if "fastsubstructure" in url or "fastsimilarity" in url: if "?" in url: url += "&MaxRecords=10" else: url += "?MaxRecords=10" resp = requests.get(url, timeout=30) except requests.Timeout: return { "error": "Request to PubChem PUG-REST timed out, try reducing query scope or retry later." } except Exception as e: return {"error": f"Failed to request PubChem PUG-REST: {str(e)}"} # 4. Check HTTP status code if resp.status_code != 200: error_detail = resp.text try: error_json = resp.json() if "Fault" in error_json: error_detail = error_json["Fault"].get("Message", error_detail) except Exception: pass return { "error": f"PubChem API returned HTTP {resp.status_code}", "detail": error_detail, } # 5. Determine return type based on URL suffix # Look at the text after the last slash in endpoint_template, like "JSON","PNG","XML","TXT","CSV" if self.output_format: out_fmt = self.output_format else: out_fmt = self.endpoint_template.strip("/").split("/")[-1].upper() if out_fmt == "JSON": try: return resp.json() except ValueError: return { "error": "Response content cannot be parsed as JSON.", "content": resp.text, } elif out_fmt in ["XML", "TXT", "CSV", "SDF"]: # These are all text formats return resp.text elif out_fmt in ["PNG", "SVG"]: # Return binary image return resp.content else: # Return text for other cases return resp.text