Skip to main content

SDKs Overview

OpenSearch AgentHealth provides official SDKs for instrumenting your AI agents with observability and evaluation capabilities.

Full-featured SDK for Python applications. Supports all major agent frameworks.

Python SDK Documentation β†’

Native support for Node.js and browser environments.

JavaScript SDK Documentation β†’

pip install opensearch-agentops
from opensearch_agentops import AgentHealth
# Initialize
agentops = AgentHealth(
endpoint="http://localhost:4317", # OTEL Collector
service_name="my-agent"
)
# Instrument your agent
@agentops.trace
def run_agent(prompt: str):
# Your agent logic here
response = my_llm.generate(prompt)
return response
# Automatic tracing captures:
# - LLM calls with token usage
# - Tool invocations
# - Execution timing
Terminal window
npm install @opensearch-project/agentops
import { AgentHealth } from '@opensearch-project/agentops';
// Initialize
const agentops = new AgentHealth({
endpoint: 'http://localhost:4317',
serviceName: 'my-agent'
});
// Instrument your agent
const result = await agentops.trace('agent_run', async (span) => {
// Your agent logic here
const response = await myLLM.generate(prompt);
// Add custom attributes
span.setAttribute('gen_ai.request.model', 'claude-sonnet-4');
return response;
});

The SDKs automatically capture:

DataDescriptionOTEL Attribute
Model InfoLLM provider and model IDgen_ai.system, gen_ai.request.model
Token UsageInput/output token countsgen_ai.usage.input_tokens
Tool CallsFunction invocationsgen_ai.tool.name
LatencyExecution timingSpan duration
ErrorsExceptions and failuresexception.*

SDKs support multiple agent frameworks through adapters:

from opensearch_agentops.adapters import (
LangGraphAdapter,
StrandsAdapter,
ClaudeCodeAdapter,
CustomAdapter
)
# LangGraph integration
adapter = LangGraphAdapter(
endpoint="http://localhost:3000",
streaming=True
)
# Strands integration (AWS Bedrock)
adapter = StrandsAdapter(
agent_id="my-strands-agent",
region="us-west-2"
)
# Custom agent
adapter = CustomAdapter(
execute_fn=my_agent_function
)

Compare different agent configurations:

# Define agent configurations
configs = [
{"agent": "langgraph", "model": "claude-sonnet-4"},
{"agent": "langgraph", "model": "gpt-4o"},
{"agent": "strands", "model": "claude-sonnet-4"},
]
# Run comparison
comparison = agentops.compare_agents(
test_case_id="tc-database-timeout",
configs=configs
)
# Analyze results
print(f"Best performing: {comparison.best_config}")
print(f"Accuracy comparison:")
for result in comparison.results:
print(f" {result.config}: {result.accuracy}/100")

Run evaluations programmatically:

# Create test case
test_case = agentops.create_test_case(
name="Database Timeout Diagnosis",
initial_prompt="Why is my API returning 503 errors?",
context=[
{"type": "log", "content": "Connection timeout errors..."},
{"type": "metric", "content": {"pool_size": [45, 48, 50, 50]}}
],
expected_outcomes=[
"Identify connection pool exhaustion",
"Recommend increasing pool size"
]
)
# Create benchmark
benchmark = agentops.create_benchmark(
name="RCA Suite",
test_case_ids=[test_case.id]
)
# Run evaluation
run = agentops.run_benchmark(
benchmark_id=benchmark.id,
agent="langgraph",
model="claude-sonnet-4"
)
# Get results
print(f"Pass Rate: {run.metrics.pass_rate}%")
print(f"Avg Accuracy: {run.metrics.avg_accuracy}")
# Access individual results
for result in run.results:
print(f"{result.test_case_name}: {result.pass_fail_status}")
print(f" Accuracy: {result.accuracy}")
print(f" Reasoning: {result.llm_judge_reasoning}")
from opensearch_agentops.integrations import LangGraphInstrumentation
# Auto-instrument LangGraph
LangGraphInstrumentation.instrument()
# Your LangGraph code is now traced automatically
from langgraph.prebuilt import create_react_agent
agent = create_react_agent(model, tools)
result = agent.invoke({"messages": [("user", prompt)]})
from opensearch_agentops.integrations import StrandsInstrumentation
# Auto-instrument Strands
StrandsInstrumentation.instrument()
# Strands agents are now traced
from strands import Agent
agent = Agent(model="claude-sonnet-4")
result = agent.run(prompt)
from opensearch_agentops.integrations import OpenAIInstrumentation
# Auto-instrument OpenAI
OpenAIInstrumentation.instrument()
# OpenAI calls are now traced
from openai import OpenAI
client = OpenAI()
response = client.chat.completions.create(
model="gpt-4o",
messages=[{"role": "user", "content": prompt}]
)
from opensearch_agentops.integrations import AnthropicInstrumentation
# Auto-instrument Anthropic
AnthropicInstrumentation.instrument()
# Anthropic calls are now traced
import anthropic
client = anthropic.Anthropic()
response = client.messages.create(
model="claude-sonnet-4-20250514",
messages=[{"role": "user", "content": prompt}]
)
Terminal window
# Required
AGENTOPS_ENDPOINT=http://localhost:4317
# Optional
AGENTOPS_SERVICE_NAME=my-agent
AGENTOPS_ENVIRONMENT=production
AGENTOPS_DEBUG=false
# LLM Judge
AGENTOPS_JUDGE_MODEL=claude-sonnet-4
AGENTOPS_JUDGE_PROVIDER=bedrock # or openai, anthropic
# OpenSearch Storage
OPENSEARCH_URL=http://localhost:9200
OPENSEARCH_USERNAME=admin
OPENSEARCH_PASSWORD=admin
from opensearch_agentops import AgentHealth, Config
config = Config(
endpoint="http://localhost:4317",
service_name="my-agent",
environment="production",
# Sampling
trace_sample_rate=1.0, # 100% sampling
# Batching
batch_size=100,
flush_interval=5000, # 5 seconds
# Judge settings
judge_model="claude-sonnet-4",
judge_provider="bedrock"
)
agentops = AgentHealth(config=config)
β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”
β”‚ Your Application β”‚
β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€
β”‚ AgentHealth SDK β”‚
β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚
β”‚ β”‚ Tracers β”‚ β”‚ Adapters β”‚ β”‚ Evaluators β”‚ β”‚
β”‚ β””β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”˜ β”‚
β”‚ β”‚ β”‚ β”‚ β”‚
β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚
β”‚ β”‚ β”‚
β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚
β”‚ β”‚ OTEL Exporter β”‚ β”‚
β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚
β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜
β”‚ OTLP gRPC/HTTP
β–Ό
β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”
β”‚ OTEL Collector β”‚
β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜
β”‚
β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”
β–Ό β–Ό β–Ό
β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”
β”‚OpenSearchβ”‚ β”‚Prometheusβ”‚ β”‚ Jaeger β”‚
β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜

Complete Python SDK reference β†’

Complete JavaScript SDK reference β†’

Common issues and solutions β†’