Source code for tooluniverse.pharos_tool
# pharos_tool.py
"""
Pharos/TCRD (Target Central Resource Database) API tool for ToolUniverse.
Pharos is the NIH Illuminating the Druggable Genome (IDG) portal providing
comprehensive information about understudied proteins and drug targets.
Key features:
- Target Development Level (TDL): Tclin, Tchem, Tbio, Tdark classification
- Druggability assessments for the human proteome
- Integration of 80+ data sources
API Documentation: https://pharos.nih.gov/api
GraphQL Endpoint: https://pharos-api.ncats.io/graphql
"""
import requests
from typing import Dict, Any, List, Optional
from .base_tool import BaseTool
from .tool_registry import register_tool
# Base URL for Pharos GraphQL API
PHAROS_GRAPHQL_URL = "https://pharos-api.ncats.io/graphql"
[docs]
@register_tool("PharosTool")
class PharosTool(BaseTool):
"""
Tool for querying Pharos/TCRD GraphQL API.
Pharos provides drug target information including:
- Target Development Level (Tdark, Tbio, Tchem, Tclin)
- Druggability assessments
- Protein family classifications
- Disease associations
- Ligand/drug information
No authentication required. Free for academic/research use.
"""
[docs]
def __init__(self, tool_config: Dict[str, Any]):
super().__init__(tool_config)
self.timeout = tool_config.get("timeout", 60) # Longer timeout for Pharos
self.operation = tool_config.get("fields", {}).get("operation", "get_target")
[docs]
def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Execute the Pharos API call."""
operation = self.operation
if operation == "get_target":
return self._get_target(arguments)
elif operation == "search_targets":
return self._search_targets(arguments)
elif operation == "get_tdl_summary":
return self._get_tdl_summary(arguments)
elif operation == "get_disease_targets":
return self._get_disease_targets(arguments)
else:
return {"status": "error", "error": f"Unknown operation: {operation}"}
[docs]
def _execute_graphql(
self, query: str, variables: Optional[Dict] = None
) -> Dict[str, Any]:
"""Execute a GraphQL query against Pharos API."""
try:
payload = {"query": query}
if variables:
payload["variables"] = variables
response = requests.post(
PHAROS_GRAPHQL_URL,
json=payload,
headers={"Content-Type": "application/json"},
timeout=self.timeout,
)
response.raise_for_status()
result = response.json()
if "errors" in result:
return {
"status": "error",
"error": result["errors"][0].get("message", "GraphQL error"),
"errors": result["errors"],
}
return {"status": "success", "data": result.get("data", {})}
except requests.exceptions.Timeout:
return {
"status": "error",
"error": f"Pharos API timeout after {self.timeout}s",
}
except requests.exceptions.RequestException as e:
return {"status": "error", "error": f"Pharos API request failed: {str(e)}"}
except Exception as e:
return {"status": "error", "error": f"Unexpected error: {str(e)}"}
[docs]
def _get_target(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""
Get detailed target information by gene symbol or UniProt ID.
Returns TDL classification, protein family, disease associations,
ligands, and druggability information.
"""
gene = arguments.get("gene")
uniprot = arguments.get("uniprot")
if not gene and not uniprot:
return {
"status": "error",
"error": "Either 'gene' or 'uniprot' parameter is required",
}
# Use the target query with q parameter (ITarget input type)
# Simplified query for reliability
if uniprot:
query = """
query GetTarget($q: ITarget!) {
target(q: $q) {
name
sym
uniprot
tdl
fam
novelty
description
publicationCount
}
}
"""
variables = {"q": {"uniprot": uniprot}}
else:
query = """
query GetTarget($q: ITarget!) {
target(q: $q) {
name
sym
uniprot
tdl
fam
novelty
description
publicationCount
}
}
"""
variables = {"q": {"sym": gene}}
result = self._execute_graphql(query, variables)
if result["status"] == "success":
target = result["data"].get("target")
if not target:
return {
"status": "success",
"data": None,
"message": f"No target found for {'UniProt ' + uniprot if uniprot else 'gene ' + gene}",
}
result["data"] = target
return result
[docs]
def _search_targets(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""
Search targets by query string.
Returns targets matching the search term with TDL classification.
"""
query_term = arguments.get("query")
top = arguments.get("top", 10)
if not query_term:
return {"status": "error", "error": "query parameter is required"}
# Simple term-based search
query = """
query SearchTargets($term: String!, $top: Int!) {
targets(filter: {term: $term}, top: $top) {
count
targets {
name
sym
uniprot
tdl
fam
novelty
description
}
}
}
"""
variables = {
"term": query_term,
"top": min(top, 100), # Cap at 100
}
result = self._execute_graphql(query, variables)
if result["status"] == "success":
targets_data = result["data"].get("targets", {})
result["data"] = {
"count": targets_data.get("count", 0),
"targets": targets_data.get("targets", []),
}
return result
[docs]
def _get_tdl_summary(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""
Get Target Development Level summary statistics.
Returns counts of targets at each TDL level:
- Tclin: Targets with approved drugs
- Tchem: Targets with small molecule activities
- Tbio: Targets with biological annotations
- Tdark: Understudied targets with minimal information
"""
# Return a static description since aggregation queries are slow
# We can query individual TDL counts if needed
query = """
query {
dbVersion
}
"""
result = self._execute_graphql(query)
if result["status"] == "success":
result["data"] = {
"tdl_levels": ["Tclin", "Tchem", "Tbio", "Tdark"],
"description": {
"Tclin": "Targets with approved drugs",
"Tchem": "Targets with small molecule activities (IC50 < 30nM)",
"Tbio": "Targets with GO annotations, OMIM phenotypes, or publications",
"Tdark": "Understudied targets with minimal information",
},
"db_version": result["data"].get("dbVersion"),
"note": "For target counts by TDL, use search_targets with specific TDL filter or visit https://pharos.nih.gov",
}
return result
[docs]
def _get_disease_targets(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""
Get targets associated with a disease.
Returns targets with TDL classification for drug discovery prioritization.
"""
disease = arguments.get("disease")
top = arguments.get("top", 20)
if not disease:
return {"status": "error", "error": "disease parameter is required"}
# Use associatedDisease filter
query = """
query GetDiseaseTargets($disease: String!, $top: Int!) {
targets(filter: {associatedDisease: $disease}, top: $top) {
count
targets {
name
sym
uniprot
tdl
fam
novelty
}
}
}
"""
variables = {"disease": disease, "top": min(top, 100)}
result = self._execute_graphql(query, variables)
if result["status"] == "success":
targets_data = result["data"].get("targets", {})
result["data"] = {
"disease": disease,
"count": targets_data.get("count", 0),
"targets": targets_data.get("targets", []),
}
return result