inspect_ai.log
Eval Log Files
list_eval_logs
List all eval logs in a directory.
def list_eval_logs(
str = os.environ.get("INSPECT_LOG_DIR", "./logs"),
log_dir: list[Literal["eval", "json"]] | None = None,
formats: filter: Callable[[EvalLog], bool] | None = None,
bool = True,
recursive: bool = True,
descending: dict[str, Any] = {},
fs_options: -> list[EvalLogInfo] )
log_dir
str-
Log directory (defaults to INSPECT_LOG_DIR)
formats
list[Literal['eval', 'json']] | None-
Formats to list (default to listing all formats)
filter
Callable[[EvalLog], bool] | None-
Filter to limit logs returned. Note that the EvalLog instance passed to the filter has only the EvalLog header (i.e. does not have the samples or logging output).
recursive
bool-
List log files recursively (defaults to True).
descending
bool-
List in descending order.
fs_options
dict[str, Any]-
Optional. Additional arguments to pass through to the filesystem provider (e.g.
S3FileSystem
).
write_eval_log
Write an evaluation log.
def write_eval_log(
log: EvalLog,str | Path | FileInfo | None = None,
location: format: Literal["eval", "json", "auto"] = "auto",
-> None )
log
EvalLog-
Evaluation log to write.
location
str | Path | FileInfo | None-
Location to write log to.
format
Literal['eval', 'json', 'auto']-
Write to format (defaults to ‘auto’ based on
log_file
extension)
write_eval_log_async
Write an evaluation log.
async def write_eval_log_async(
log: EvalLog,str | Path | FileInfo | None = None,
location: format: Literal["eval", "json", "auto"] = "auto",
-> None )
log
EvalLog-
Evaluation log to write.
location
str | Path | FileInfo | None-
Location to write log to.
format
Literal['eval', 'json', 'auto']-
Write to format (defaults to ‘auto’ based on
log_file
extension)
read_eval_log
Read an evaluation log.
def read_eval_log(
str | Path | EvalLogInfo,
log_file: bool = False,
header_only: bool = False,
resolve_attachments: format: Literal["eval", "json", "auto"] = "auto",
-> EvalLog )
log_file
str | Path | EvalLogInfo-
Log file to read.
header_only
bool-
Read only the header (i.e. exclude the “samples” and “logging” fields). Defaults to False.
resolve_attachments
bool-
Resolve attachments (e.g. images) to their full content.
format
Literal['eval', 'json', 'auto']-
Read from format (defaults to ‘auto’ based on
log_file
extension)
read_eval_log_async
Read an evaluation log.
async def read_eval_log_async(
str | Path | EvalLogInfo,
log_file: bool = False,
header_only: bool = False,
resolve_attachments: format: Literal["eval", "json", "auto"] = "auto",
-> EvalLog )
log_file
str | Path | EvalLogInfo-
Log file to read.
header_only
bool-
Read only the header (i.e. exclude the “samples” and “logging” fields). Defaults to False.
resolve_attachments
bool-
Resolve attachments (e.g. images) to their full content.
format
Literal['eval', 'json', 'auto']-
Read from format (defaults to ‘auto’ based on
log_file
extension)
read_eval_log_sample
Read a sample from an evaluation log.
def read_eval_log_sample(
str | Path | EvalLogInfo,
log_file: id: int | str,
int = 1,
epoch: bool = False,
resolve_attachments: format: Literal["eval", "json", "auto"] = "auto",
-> EvalSample )
log_file
str | Path | EvalLogInfo-
Log file to read.
id
int | str-
Sample id to read.
epoch
int-
Epoch for sample id (defaults to 1)
resolve_attachments
bool-
Resolve attachments (e.g. images) to their full content.
format
Literal['eval', 'json', 'auto']-
Read from format (defaults to ‘auto’ based on
log_file
extension)
read_eval_log_samples
Read all samples from an evaluation log incrementally.
Generator for samples in a log file. Only one sample at a time will be read into memory and yielded to the caller.
def read_eval_log_samples(
str | Path | EvalLogInfo,
log_file: bool = True,
all_samples_required: bool = False,
resolve_attachments: format: Literal["eval", "json", "auto"] = "auto",
-> Generator[EvalSample, None, None] )
log_file
str | Path | EvalLogInfo-
Log file to read.
all_samples_required
bool-
All samples must be included in the file or an IndexError is thrown.
resolve_attachments
bool-
Resolve attachments (e.g. images) to their full content.
format
Literal['eval', 'json', 'auto']-
Read from format (defaults to ‘auto’ based on
log_file
extension)
read_eval_log_sample_summaries
Read sample summaries from an eval log.
def read_eval_log_sample_summaries(
str | Path | EvalLogInfo,
log_file: format: Literal["eval", "json", "auto"] = "auto",
-> list[EvalSampleSummary] )
log_file
str | Path | EvalLogInfo-
Log file to read.
format
Literal['eval', 'json', 'auto']-
Read from format (defaults to ‘auto’ based on
log_file
extension)
convert_eval_logs
Convert between log file formats.
Convert log file(s) to a target format. If a file is already in the target format it will just be copied to the output dir.
def convert_eval_logs(
str, to: Literal["eval", "json"], output_dir: str, overwrite: bool = False
path: -> None )
path
str-
Path to source log file(s). Should be either a single log file or a directory containing log files.
to
Literal['eval', 'json']-
Format to convert to. If a file is already in the target format it will just be copied to the output dir.
output_dir
str-
Output directory to write converted log file(s) to.
overwrite
bool-
Overwrite existing log files (defaults to
False
, raising an error if the output file path already exists).
bundle_log_dir
Bundle a log_dir into a statically deployable viewer
def bundle_log_dir(
str | None = None,
log_dir: str | None = None,
output_dir: bool = False,
overwrite: dict[str, Any] = {},
fs_options: -> None )
log_dir
str | None-
(str | None): The log_dir to bundle
output_dir
str | None-
(str | None): The directory to place bundled output. If no directory is specified, the env variable
INSPECT_VIEW_BUNDLE_OUTPUT_DIR
will be used. overwrite
bool-
(bool): Optional. Whether to overwrite files in the output directory. Defaults to False.
fs_options
dict[str, Any]-
Optional. Additional arguments to pass through to the filesystem provider (e.g.
S3FileSystem
).
write_log_dir_manifest
Write a manifest for a log directory.
A log directory manifest is a dictionary of EvalLog headers (EvalLog w/o samples) keyed by log file names (names are relative to the log directory)
def write_log_dir_manifest(
str,
log_dir: *,
str = "logs.json",
filename: str | None = None,
output_dir: dict[str, Any] = {},
fs_options: -> None )
log_dir
str-
Log directory to write manifest for.
filename
str-
Manifest filename (defaults to “logs.json”)
output_dir
str | None-
Output directory for manifest (defaults to log_dir)
fs_options
dict[str, Any]-
Optional. Additional arguments to pass through to the filesystem provider (e.g.
S3FileSystem
).
retryable_eval_logs
Extract the list of retryable logs from a list of logs.
Retryable logs are logs with status “error” or “cancelled” that do not have a corresponding log with status “success” (indicating they were subsequently retried and completed)
def retryable_eval_logs(logs: list[EvalLogInfo]) -> list[EvalLogInfo]
logs
list[EvalLogInfo]-
List of logs to examine.
EvalLogInfo
File info and task identifiers for eval log.
class EvalLogInfo(BaseModel)
Attributes
name
str-
Name of file.
type
str-
Type of file (file or directory)
size
int-
File size in bytes.
mtime
float | None-
File modification time (None if the file is a directory on S3).
task
str-
Task name.
task_id
str-
Task id.
suffix
str | None-
Log file suffix (e.g. “-scored”)
Eval Log API
EvalLog
Evaluation log.
class EvalLog(BaseModel)
Attributes
version
int-
Eval log file format version.
status
Literal['started', 'success', 'cancelled', 'error']-
Status of evaluation (did it succeed or fail).
eval
EvalSpec-
Eval identity and configuration.
plan
EvalPlan-
Eval plan (solvers and config)
results
EvalResults | None-
Eval results (scores and metrics).
stats
EvalStats-
Eval stats (runtime, model usage)
error
EvalError | None-
Error that halted eval (if status==“error”)
samples
list[EvalSample] | None-
Samples processed by eval.
reductions
list[EvalSampleReductions] | None-
Reduced sample values
location
str-
Location that the log file was read from.
EvalSpec
Eval target and configuration.
class EvalSpec(BaseModel)
Attributes
eval_id
str-
Globally unique id for eval.
run_id
str-
Unique run id
created
str-
Time created.
task
str-
Task name.
task_id
str-
Unique task id.
task_version
int-
Task version.
task_file
str | None-
Task source file.
task_registry_name
str | None-
Task registry name.
task_attribs
dict[str, Any]-
Attributes of the @task decorator.
task_args
dict[str, Any]-
Arguments used for invoking the task.
solver
str | None-
Solver name.
solver_args
dict[str, Any] | None-
Arguments used for invoking the solver.
tags
list[str] | None-
Tags associated with evaluation run.
dataset
EvalDataset-
Dataset used for eval.
sandbox
SandboxEnvironmentSpec | None-
Sandbox environment type and optional config file.
model
str-
Model used for eval.
model_generate_config
GenerateConfig-
Generate config specified for model instance.
model_base_url
str | None-
Optional override of model base url
model_args
dict[str, Any]-
Model specific arguments.
model_roles
dict[str, EvalModelConfig] | None-
Model roles.
config
EvalConfig-
Configuration values for eval.
revision
EvalRevision | None-
Source revision of eval.
packages
dict[str, str]-
Package versions for eval.
metadata
dict[str, Any] | None-
Additional eval metadata.
scorers
list[EvalScorer] | None-
Scorers and args for this eval
metrics
list[EvalMetricDefinition] | dict[str, list[EvalMetricDefinition]] | None-
metrics and args for this eval
EvalDataset
Dataset used for evaluation.
class EvalDataset(BaseModel)
Attributes
name
str | None-
Dataset name.
location
str | None-
Dataset location (file path or remote URL)
samples
int | None-
Number of samples in the dataset.
sample_ids
list[str] | list[int] | list[str | int] | None-
IDs of samples in the dataset.
shuffled
bool | None-
Was the dataset shuffled after reading.
EvalConfig
Configuration used for evaluation.
class EvalConfig(BaseModel)
Attributes
limit
int | tuple[int, int] | None-
Sample limit (number of samples or range of samples).
sample_id
str | int | list[str] | list[int] | list[str | int] | None-
Evaluate specific sample(s).
epochs
int | None-
Number of epochs to run samples over.
epochs_reducer
list[str] | None-
Reducers for aggregating per-sample scores.
approval
ApprovalPolicyConfig | None-
Approval policy for tool use.
fail_on_error
bool | float | None-
Fail eval when sample errors occur.
True
to fail on first sample error (default);False
to never fail on sample errors; Value between 0 and 1 to fail if a proportion of total samples fails. Value greater than 1 to fail eval if a count of samples fails. retry_on_error
int | None-
Number of times to retry samples if they encounter errors.
message_limit
int | None-
Maximum messages to allow per sample.
token_limit
int | None-
Maximum tokens usage per sample.
time_limit
int | None-
Maximum clock time per sample.
working_limit
int | None-
Meximum working time per sample.
max_samples
int | None-
Maximum number of samples to run in parallel.
max_tasks
int | None-
Maximum number of tasks to run in parallel.
max_subprocesses
int | None-
Maximum number of subprocesses to run concurrently.
max_sandboxes
int | None-
Maximum number of sandboxes to run concurrently.
sandbox_cleanup
bool | None-
Cleanup sandbox environments after task completes.
log_samples
bool | None-
Log detailed information on each sample.
log_realtime
bool | None-
Log events in realtime (enables live viewing of samples in inspect view).
log_images
bool | None-
Log base64 encoded versions of images.
log_buffer
int | None-
Number of samples to buffer before writing log file.
log_shared
int | None-
Interval (in seconds) for syncing sample events to log directory.
score_display
bool | None-
Display scoring metrics realtime.
EvalModelConfig
Model config.
class EvalModelConfig(BaseModel)
Attributes
model
str-
Model name.
config
GenerateConfig-
Generate config
base_url
str | None-
Model base url.
args
dict[str, Any]-
Model specific arguments.
EvalRevision
Git revision for evaluation.
class EvalRevision(BaseModel)
Attributes
type
Literal['git']-
Type of revision (currently only “git”)
origin
str-
Revision origin server
commit
str-
Revision commit.
EvalPlan
Plan (solvers) used in evaluation.
class EvalPlan(BaseModel)
Attributes
name
str-
Plan name.
steps
list[EvalPlanStep]-
Steps in plan.
finish
EvalPlanStep | None-
Step to always run at the end.
config
GenerateConfig-
Generation config.
EvalPlanStep
Solver step.
class EvalPlanStep(BaseModel)
Attributes
solver
str-
Name of solver.
params
dict[str, Any]-
Parameters used to instantiate solver.
EvalResults
Scoring results from evaluation.
class EvalResults(BaseModel)
Attributes
total_samples
int-
Total samples in eval (dataset samples * epochs)
completed_samples
int-
Samples completed without error.
Will be equal to total_samples except when –fail-on-error is enabled.
scores
list[EvalScore]-
Scorers used to compute results
metadata
dict[str, Any] | None-
Additional results metadata.
sample_reductions
list[EvalSampleReductions] | None-
List of per sample scores reduced across epochs
EvalScore
Score for evaluation task.
class EvalScore(BaseModel)
Attributes
name
str-
Score name.
scorer
str-
Scorer name.
reducer
str | None-
Reducer name.
params
dict[str, Any]-
Parameters specified when creating scorer.
metrics
dict[str, EvalMetric]-
Metrics computed for this scorer.
metadata
dict[str, Any] | None-
Additional scorer metadata.
EvalMetric
Metric for evaluation score.
class EvalMetric(BaseModel)
Attributes
name
str-
Metric name.
value
int | float-
Metric value.
params
dict[str, Any]-
Params specified when creating metric.
metadata
dict[str, Any] | None-
Additional metadata associated with metric.
EvalSampleReductions
Score reductions.
class EvalSampleReductions(BaseModel)
Attributes
scorer
str-
Name the of scorer
reducer
str | None-
Name the of reducer
samples
list[EvalSampleScore]-
List of reduced scores
EvalStats
Timing and usage statistics.
class EvalStats(BaseModel)
Attributes
started_at
str-
Evaluation start time.
completed_at
str-
Evaluation completion time.
model_usage
dict[str, ModelUsage]-
Model token usage for evaluation.
EvalError
Eval error details.
class EvalError(BaseModel)
Attributes
message
str-
Error message.
traceback
str-
Error traceback.
traceback_ansi
str-
Error traceback with ANSI color codes.
EvalSample
Sample from evaluation task.
class EvalSample(BaseModel)
Attributes
id
int | str-
Unique id for sample.
epoch
int-
Epoch number for sample.
input
str | list[ChatMessage]-
Sample input.
choices
list[str] | None-
Sample choices.
target
str | list[str]-
Sample target value(s)
sandbox
SandboxEnvironmentSpec | None-
Sandbox environment type and optional config file.
files
list[str] | None-
Files that go along with the sample (copied to SandboxEnvironment)
setup
str | None-
Setup script to run for sample (run within default SandboxEnvironment).
messages
list[ChatMessage]-
Chat conversation history for sample.
output
ModelOutput-
Model output from sample.
scores
dict[str, Score] | None-
Scores for sample.
metadata
dict[str, Any]-
Additional sample metadata.
store
dict[str, Any]-
State at end of sample execution.
events
list[Event]-
Events that occurred during sample execution.
model_usage
dict[str, ModelUsage]-
Model token usage for sample.
total_time
float | None-
Total time that the sample was running.
working_time
float | None-
Time spent working (model generation, sandbox calls, etc.)
uuid
str | None-
Globally unique identifier for sample run (exists for samples created in Inspect >= 0.3.70)
error
EvalError | None-
Error that halted sample.
error_retries
list[EvalError] | None-
Errors that were retried for this sample.
attachments
dict[str, str]-
Attachments referenced from messages and events.
Resolve attachments for a sample (replacing attachment://* references with attachment content) by passing
resolve_attachments=True
to log reading functions. limit
EvalSampleLimit | None-
The limit that halted the sample
Methods
- metadata_as
-
Pydantic model interface to metadata.
def metadata_as(self, metadata_cls: Type[MT]) -> MT
metadata_cls
Type[MT]-
Pydantic model type
- store_as
-
Pydantic model interface to the store.
def store_as(self, model_cls: Type[SMT], instance: str | None = None) -> SMT
model_cls
Type[SMT]-
Pydantic model type (must derive from StoreModel)
instance
str | None-
Optional instances name for store (enables multiple instances of a given StoreModel type within a single sample)
- summary
-
Summary of sample.
The summary excludes potentially large fields like messages, output, events, store, and metadata so that it is always fast to load.
If there are images, audio, or video in the input, they are replaced with a placeholder.
def summary(self) -> EvalSampleSummary
EvalSampleSummary
Summary information (including scoring) for a sample.
class EvalSampleSummary(BaseModel)
Attributes
id
int | str-
Unique id for sample.
epoch
int-
Epoch number for sample.
input
str | list[ChatMessage]-
Sample input (text inputs only).
target
str | list[str]-
Sample target value(s)
metadata
dict[str, Any]-
Sample metadata (scalar types only, strings truncated to 1k).
scores
dict[str, Score] | None-
Scores for sample (score values only, no answers, explanations, or metadata).
model_usage
dict[str, ModelUsage]-
Model token usage for sample.
total_time
float | None-
Total time that the sample was running.
working_time
float | None-
Time spent working (model generation, sandbox calls, etc.)
uuid
str | None-
Globally unique identifier for sample run (exists for samples created in Inspect >= 0.3.70)
error
str | None-
Error that halted sample.
limit
str | None-
Limit that halted the sample
retries
int | None-
Number of retries for the sample.
completed
bool-
Is the sample complete.
EvalSampleLimit
Limit encontered by sample.
class EvalSampleLimit(BaseModel)
Attributes
type
Literal['context', 'time', 'working', 'message', 'token', 'operator', 'custom']-
The type of limit
limit
int-
The limit value
EvalSampleReductions
Score reductions.
class EvalSampleReductions(BaseModel)
Attributes
scorer
str-
Name the of scorer
reducer
str | None-
Name the of reducer
samples
list[EvalSampleScore]-
List of reduced scores
EvalSampleScore
Score and sample_id scored.
class EvalSampleScore(Score)
Attributes
sample_id
str | int | None-
Sample ID.
Transcript API
transcript
Get the current Transcript.
def transcript() -> Transcript
Transcript
Transcript of events.
class Transcript
Methods
- info
-
Add an InfoEvent to the transcript.
def info(self, data: JsonValue, *, source: str | None = None) -> None
data
JsonValue-
Data associated with the event.
source
str | None-
Optional event source.
- step
-
Context manager for recording StepEvent.
The
step()
context manager is deprecated and will be removed in a future version. Please use the span() context manager instead.@contextlib.contextmanager def step(self, name: str, type: str | None = None) -> Iterator[None]
name
str-
Step name.
type
str | None-
Optional step type.
Event
Event in a transcript.
= Union[
Event: TypeAlias
SampleInitEvent| SampleLimitEvent
| SandboxEvent
| StateEvent
| StoreEvent
| ModelEvent
| ToolEvent
| SandboxEvent
| ApprovalEvent
| InputEvent
| ScoreEvent
| ErrorEvent
| LoggerEvent
| InfoEvent
| SpanBeginEvent
| SpanEndEvent
| StepEvent
| SubtaskEvent,
]
event_tree
Build a tree representation of a sequence of events.
Organize events heirarchially into event spans.
def event_tree(events: Sequence[Event]) -> EventTree
event_sequence
Flatten a span forest back into a properly ordered seqeunce.
def event_sequence(tree: EventTree) -> Iterable[Event]
tree
EventTree-
Event tree
EventTree
Tree of events (has invividual events and event spans).
= list[EventNode] EventTree: TypeAlias
EventNode
Node in an event tree.
= "SpanNode" | Event EventNode: TypeAlias
SpanNode
Event tree node representing a span of events.
@dataclass
class SpanNode
Attributes
id
str-
Span id.
parent_id
str | None-
Parent span id.
type
str | None-
Optional ‘type’ field for span.
name
str-
Span name.
begin
SpanBeginEvent-
Span begin event.
end
SpanEndEvent | None-
Span end event (if any).
children
list[EventNode]-
Children in the span.
SampleInitEvent
Beginning of processing a Sample.
class SampleInitEvent(BaseEvent)
Attributes
event
Literal['sample_init']-
Event type.
sample
Sample-
Sample.
state
JsonValue-
Initial state.
SampleLimitEvent
The sample was unable to finish processing due to a limit
class SampleLimitEvent(BaseEvent)
Attributes
event
Literal['sample_limit']-
Event type.
type
Literal['message', 'time', 'working', 'token', 'operator', 'custom']-
Type of limit that halted processing
message
str-
A message associated with this limit
limit
int | None-
The limit value (if any)
StateEvent
Change to the current TaskState
class StateEvent(BaseEvent)
Attributes
event
Literal['state']-
Event type.
changes
list[JsonChange]-
List of changes to the TaskState
StoreEvent
Change to data within the current Store.
class StoreEvent(BaseEvent)
Attributes
event
Literal['store']-
Event type.
changes
list[JsonChange]-
List of changes to the Store.
ModelEvent
Call to a language model.
class ModelEvent(BaseEvent)
Attributes
event
Literal['model']-
Event type.
model
str-
Model name.
role
str | None-
Model role.
input
list[ChatMessage]-
Model input (list of messages).
tools
list[ToolInfo]-
Tools available to the model.
tool_choice
ToolChoice-
Directive to the model which tools to prefer.
config
GenerateConfig-
Generate config used for call to model.
output
ModelOutput-
Output from model.
retries
int | None-
Retries for the model API request.
error
str | None-
Error which occurred during model call.
cache
Literal['read', 'write'] | None-
Was this a cache read or write.
call
ModelCall | None-
Raw call made to model API.
completed
datetime | None-
Time that model call completed (see
timestamp
for started) working_time
float | None-
working time for model call that succeeded (i.e. was not retried).
ToolEvent
Call to a tool.
class ToolEvent(BaseEvent)
Attributes
event
Literal['tool']-
Event type.
type
Literal['function']-
Type of tool call (currently only ‘function’)
id
str-
Unique identifier for tool call.
function
str-
Function called.
arguments
dict[str, JsonValue]-
Arguments to function.
internal
JsonValue | None-
Model provider specific payload - typically used to aid transformation back to model types.
view
ToolCallContent | None-
Custom view of tool call input.
result
ToolResult-
Function return value.
truncated
tuple[int, int] | None-
Bytes truncated (from,to) if truncation occurred
error
ToolCallError | None-
Error that occurred during tool call.
completed
datetime | None-
Time that tool call completed (see
timestamp
for started) working_time
float | None-
Working time for tool call (i.e. time not spent waiting on semaphores).
agent
str | None-
Name of agent if the tool call was an agent handoff.
failed
bool | None-
Did the tool call fail with a hard error?.
cancelled
bool-
Was the task cancelled?
SandboxEvent
Sandbox execution or I/O
class SandboxEvent(BaseEvent)
Attributes
event
Literal['sandbox']-
Event type
action
Literal['exec', 'read_file', 'write_file']-
Sandbox action
cmd
str | None-
Command (for exec)
options
dict[str, JsonValue] | None-
Options (for exec)
file
str | None-
File (for read_file and write_file)
input
str | None-
Input (for cmd and write_file). Truncated to 100 lines.
result
int | None-
Result (for exec)
output
str | None-
Output (for exec and read_file). Truncated to 100 lines.
completed
datetime | None-
Time that sandbox action completed (see
timestamp
for started)
ApprovalEvent
Tool approval.
class ApprovalEvent(BaseEvent)
Attributes
event
Literal['approval']-
Event type
message
str-
Message generated by model along with tool call.
call
ToolCall-
Tool call being approved.
view
ToolCallView | None-
View presented for approval.
approver
str-
Aprover name.
decision
Literal['approve', 'modify', 'reject', 'escalate', 'terminate']-
Decision of approver.
modified
ToolCall | None-
Modified tool call for decision ‘modify’.
explanation
str | None-
Explanation for decision.
InputEvent
Input screen interaction.
class InputEvent(BaseEvent)
Attributes
event
Literal['input']-
Event type.
input
str-
Input interaction (plain text).
input_ansi
str-
Input interaction (ANSI).
ErrorEvent
Event with sample error.
class ErrorEvent(BaseEvent)
Attributes
event
Literal['error']-
Event type.
error
EvalError-
Sample error
LoggerEvent
Log message recorded with Python logger.
class LoggerEvent(BaseEvent)
Attributes
event
Literal['logger']-
Event type.
message
LoggingMessage-
Logging message
LoggingLevel
Logging level.
= Literal[
LoggingLevel "debug", "trace", "http", "sandbox", "info", "warning", "error", "critical"
]
LoggingMessage
Message written to Python log.
class LoggingMessage(BaseModel)
Attributes
name
str | None-
Logger name (e.g. ‘httpx’)
level
LoggingLevel-
Logging level.
message
str-
Log message.
created
float-
Message created time.
filename
str-
Logged from filename.
module
str-
Logged from module.
lineno
int-
Logged from line number.
InfoEvent
Event with custom info/data.
class InfoEvent(BaseEvent)
Attributes
event
Literal['info']-
Event type.
source
str | None-
Optional source for info event.
data
JsonValue-
Data provided with event.
SpanBeginEvent
Mark the beginning of a transcript span.
class SpanBeginEvent(BaseEvent)
Attributes
event
Literal['span_begin']-
Event type.
id
str-
Unique identifier for span.
parent_id
str | None-
Identifier for parent span.
type
str | None-
Optional ‘type’ field for span.
name
str-
Span name.
SpanEndEvent
Mark the end of a transcript span.
class SpanEndEvent(BaseEvent)
Attributes
event
Literal['span_end']-
Event type.
id
str-
Unique identifier for span.
SubtaskEvent
Subtask spawned.
class SubtaskEvent(BaseEvent)
Attributes
event
Literal['subtask']-
Event type.
name
str-
Name of subtask function.
type
str | None-
Type of subtask
input
dict[str, Any]-
Subtask function inputs.
result
Any-
Subtask function result.
completed
datetime | None-
Time that subtask completed (see
timestamp
for started) working_time
float | None-
Working time for subtask (i.e. time not spent waiting on semaphores or model retries).