Create Monitor

curl --request POST \
  --url https://api.pandaprobe.com/evaluations/monitors \
  --header 'Authorization: Bearer <token>' \
  --header 'Content-Type: application/json' \
  --data '
{
  "name": "Daily production trace eval",
  "target_type": "TRACE",
  "metrics": [
    "task_completion",
    "tool_correctness"
  ],
  "cadence": "daily"
}
'

import requests

url = "https://api.pandaprobe.com/evaluations/monitors"

payload = {
    "name": "Daily production trace eval",
    "target_type": "TRACE",
    "metrics": ["task_completion", "tool_correctness"],
    "cadence": "daily"
}
headers = {
    "Authorization": "Bearer <token>",
    "Content-Type": "application/json"
}

response = requests.post(url, json=payload, headers=headers)

print(response.text)

const options = {
  method: 'POST',
  headers: {Authorization: 'Bearer <token>', 'Content-Type': 'application/json'},
  body: JSON.stringify({
    name: 'Daily production trace eval',
    target_type: 'TRACE',
    metrics: ['task_completion', 'tool_correctness'],
    cadence: 'daily'
  })
};

fetch('https://api.pandaprobe.com/evaluations/monitors', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

package main

import (
	"fmt"
	"strings"
	"net/http"
	"io"
)

func main() {

	url := "https://api.pandaprobe.com/evaluations/monitors"

	payload := strings.NewReader("{\n  \"name\": \"Daily production trace eval\",\n  \"target_type\": \"TRACE\",\n  \"metrics\": [\n    \"task_completion\",\n    \"tool_correctness\"\n  ],\n  \"cadence\": \"daily\"\n}")

	req, _ := http.NewRequest("POST", url, payload)

	req.Header.Add("Authorization", "Bearer <token>")
	req.Header.Add("Content-Type", "application/json")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

{
  "id": "3c90c3cc-0d44-4b50-8888-8dd25736052a",
  "project_id": "3c90c3cc-0d44-4b50-8888-8dd25736052a",
  "name": "<string>",
  "target_type": "<string>",
  "metric_names": [
    "<string>"
  ],
  "filters": {},
  "sampling_rate": 123,
  "model": "<string>",
  "cadence": "<string>",
  "only_if_changed": true,
  "status": "<string>",
  "last_run_at": "<string>",
  "last_run_id": "3c90c3cc-0d44-4b50-8888-8dd25736052a",
  "next_run_at": "<string>",
  "created_at": "<string>",
  "updated_at": "<string>"
}

{
  "detail": [
    {
      "loc": [
        "<string>"
      ],
      "msg": "<string>",
      "type": "<string>",
      "input": "<unknown>",
      "ctx": {}
    }
  ]
}

Monitors

Create Monitor

Create an evaluation monitor that spawns eval runs on a recurring schedule.

Monitors persist a reusable evaluation configuration (target type, metrics, filters, cadence) and the system automatically creates eval runs at each scheduled interval. If only_if_changed is true (default), runs are skipped when no new data has arrived since the previous run.

Request body fields:

name (string, required): Human-readable label, e.g. "Daily prod eval".
target_type (string, required): "TRACE" or "SESSION".
metrics (string[], required): Metric names to run. Use GET /evaluations/trace-metrics (TRACE) or GET /evaluations/session-metrics (SESSION) for available names.
filters (object, optional, default ): Scope the data the monitor evaluates. Pass {} to match everything. Accepted keys depend on target_type:
- TRACE: date_from, date_to (ISO 8601), status (PENDING/RUNNING/COMPLETED/ERROR), session_id, user_id, tags (string[]), name (substring match).
- SESSION: date_from, date_to, user_id, has_error (bool), tags (string[]), min_trace_count (int).
cadence (string, required): Firing schedule. Predefined: "every_6h", "daily", "weekly". Custom cron: "cron:<min hour dom month dow>", e.g. "cron:0 3 * * *" (daily 3 AM UTC), "cron:0 6 * * 1-5" (weekdays 6 AM).
sampling_rate (float, optional, default 1.0): Fraction of matching items to evaluate per run (0.0–1.0).
model (string, optional): LLM model override, e.g. "openai/gpt-4o".
only_if_changed (bool, optional, default true): Skip the run if no new traces/sessions exist since the last run.
signal_weights (object, optional, SESSION only): Override signal aggregation weights. Keys: confidence, loop_detection, tool_correctness, coherence.

Auth: Bearer + X-Project-ID | X-API-Key + X-Project-Name

POST

evaluations

monitors

Create Monitor

curl --request POST \
  --url https://api.pandaprobe.com/evaluations/monitors \
  --header 'Authorization: Bearer <token>' \
  --header 'Content-Type: application/json' \
  --data '
{
  "name": "Daily production trace eval",
  "target_type": "TRACE",
  "metrics": [
    "task_completion",
    "tool_correctness"
  ],
  "cadence": "daily"
}
'

import requests

url = "https://api.pandaprobe.com/evaluations/monitors"

payload = {
    "name": "Daily production trace eval",
    "target_type": "TRACE",
    "metrics": ["task_completion", "tool_correctness"],
    "cadence": "daily"
}
headers = {
    "Authorization": "Bearer <token>",
    "Content-Type": "application/json"
}

response = requests.post(url, json=payload, headers=headers)

print(response.text)

const options = {
  method: 'POST',
  headers: {Authorization: 'Bearer <token>', 'Content-Type': 'application/json'},
  body: JSON.stringify({
    name: 'Daily production trace eval',
    target_type: 'TRACE',
    metrics: ['task_completion', 'tool_correctness'],
    cadence: 'daily'
  })
};

fetch('https://api.pandaprobe.com/evaluations/monitors', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

package main

import (
	"fmt"
	"strings"
	"net/http"
	"io"
)

func main() {

	url := "https://api.pandaprobe.com/evaluations/monitors"

	payload := strings.NewReader("{\n  \"name\": \"Daily production trace eval\",\n  \"target_type\": \"TRACE\",\n  \"metrics\": [\n    \"task_completion\",\n    \"tool_correctness\"\n  ],\n  \"cadence\": \"daily\"\n}")

	req, _ := http.NewRequest("POST", url, payload)

	req.Header.Add("Authorization", "Bearer <token>")
	req.Header.Add("Content-Type", "application/json")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

{
  "id": "3c90c3cc-0d44-4b50-8888-8dd25736052a",
  "project_id": "3c90c3cc-0d44-4b50-8888-8dd25736052a",
  "name": "<string>",
  "target_type": "<string>",
  "metric_names": [
    "<string>"
  ],
  "filters": {},
  "sampling_rate": 123,
  "model": "<string>",
  "cadence": "<string>",
  "only_if_changed": true,
  "status": "<string>",
  "last_run_at": "<string>",
  "last_run_id": "3c90c3cc-0d44-4b50-8888-8dd25736052a",
  "next_run_at": "<string>",
  "created_at": "<string>",
  "updated_at": "<string>"
}

{
  "detail": [
    {
      "loc": [
        "<string>"
      ],
      "msg": "<string>",
      "type": "<string>",
      "input": "<unknown>",
      "ctx": {}
    }
  ]
}

Authorizations

Authorization

string

header

required

Bearer authentication header of the form Bearer <token>, where <token> is your auth token.

Body

application/json

Create an evaluation monitor that spawns runs on a cadence.

name

string

required

Human-readable name for the monitor, e.g. 'Daily prod trace eval'.

target_type

string

required

Evaluation scope: 'TRACE' for trace-level metrics or 'SESSION' for session-level metrics.

metrics

string[]

required

Metric names to run on each scheduled eval. For TRACE monitors use GET /evaluations/trace-metrics; for SESSION monitors use GET /evaluations/session-metrics to list available names. Example: ['task_completion', 'step_efficiency'].

Minimum array length: 1

cadence

string

required

How often the monitor fires. Predefined intervals: 'every_6h', 'daily', 'weekly'. Custom cron: 'cron:<5-part expression>' where the five parts are minute hour day-of-month month day-of-week. Examples: 'cron:0 3 * * *' (daily at 3 AM UTC), 'cron:0 6 * * 1-5' (weekdays at 6 AM), 'cron:0 */4 * * *' (every 4 hours).

filters

Filters · object

JSON object defining which traces/sessions the monitor targets. Pass {} to match everything in the project. TRACE monitors accept: date_from (ISO 8601), date_to (ISO 8601), status (PENDING|RUNNING|COMPLETED|ERROR), session_id, user_id, tags (string array, ANY match), name (substring, case-insensitive). SESSION monitors accept: date_from, date_to, user_id, has_error (bool), tags, min_trace_count (int). Example: {"status": "COMPLETED", "tags": ["production"]}.

sampling_rate

number

default:1

Fraction of matching traces/sessions to evaluate per run. 1.0 = all, 0.1 = random 10%.

Required range: 0 <= x <= 1

model

string | null

LLM model override for judge calls (e.g. 'openai/gpt-4o'). Uses system default if null.

only_if_changed

boolean

default:true

When true, the scheduled run is skipped if no new traces/sessions have arrived since the last run, saving LLM costs. Set to false to always run on schedule regardless of new data.

signal_weights

Signal Weights · object | null

Override default signal weights for session-level aggregation. Only valid for SESSION monitors (rejected for TRACE). Keys: confidence, loop_detection, tool_correctness, coherence. Defaults: confidence=1.0, loop_detection=1.0, tool_correctness=0.8, coherence=1.0. Example: {"confidence": 1.0, "loop_detection": 1.5}.

Show child attributes

Response

Successful Response

Full evaluation monitor representation.

string<uuid>

required

project_id

string<uuid>

required

name

string

required

target_type

string

required

metric_names

string[]

required

filters

Filters · object

required

sampling_rate

number

required

model

string | null

required

cadence

string

required

only_if_changed

boolean

required

status

string

required

last_run_at

string | null

required

last_run_id

string<uuid> | null

required

next_run_at

string | null

required

created_at

string

required

updated_at

string

required

Get Session Score Comparison List Monitors