shared

package
v1.2.10 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Mar 23, 2026 License: MIT Imports: 10 Imported by: 0

Documentation

Overview

Package shared provides common types and helpers for DAG-Math memory evaluation experiments.

Index

Constants

View Source
const (
	MaxOutputTokens = 256

	OrlaURL     = "http://orla:8081"
	SGLangURL   = "http://sglang:30000/v1"
	DatasetRoot = "/dataset/test"
	OutputPath  = "/output/results.json"
	MetricsPath = "/output/metrics.json"
)
View Source
const SystemPrompt = `` /* 460-byte string literal not displayed */

SystemPrompt instructs the model to continue the mathematical reasoning chain.

Variables

View Source
var MaxInstances = maxInstancesFromEnv()

Functions

func BuildStagePrompt

func BuildStagePrompt(problem DAGMathProblem, step DAGMathStep, depResults map[int]string) string

BuildStagePrompt constructs the user message for a single DAG-Math step. Order is chosen for KV cache reuse: problem first (shared), then previous steps (shared among siblings), then current step (unique per stage).

func LogDeferredError

func LogDeferredError(fn func() error)

Types

type DAGMathDataset

type DAGMathDataset struct {
	Problems []DAGMathProblem
}

DAGMathDataset is the loaded set of problems.

func LoadDataset

func LoadDataset() (*DAGMathDataset, error)

LoadDataset opens the dataset root and loads all .json problem files.

type DAGMathProblem

type DAGMathProblem struct {
	ProblemID   int           `json:"problem_id"`
	ProblemText string        `json:"problem_text"`
	FinalAnswer string        `json:"final_answer"`
	Difficulty  float64       `json:"difficulty"`
	Domain      []string      `json:"domain"`
	Steps       []DAGMathStep `json:"steps"`
}

DAGMathProblem is one problem from the DAG-MATH-Formatted-CoT dataset.

type DAGMathStep

type DAGMathStep struct {
	StepID               int    `json:"step_id"`
	Edge                 string `json:"edge"`
	DirectDependentSteps []int  `json:"direct_dependent_steps"`
	Node                 string `json:"node"`
}

DAGMathStep is one step (node) in a DAG-Math solution.

type RunMetrics

type RunMetrics struct {
	ExperimentName        string            `json:"experiment_name"`
	StartTimeMs           int64             `json:"start_time_ms"`
	EndTimeMs             int64             `json:"end_time_ms"`
	TotalDurationMs       int64             `json:"total_duration_ms"`
	WorkflowsCount        int               `json:"workflows_count"`
	TotalPromptTokens     int               `json:"total_prompt_tokens"`
	TotalCompletionTokens int               `json:"total_completion_tokens"`
	Workflows             []WorkflowMetrics `json:"workflows"`
}

RunMetrics is the full run output.

type RunMetricsRecorder

type RunMetricsRecorder struct {
	ExperimentName string
	TotalWorkflows int
	// contains filtered or unexported fields
}

RunMetricsRecorder records timings for a run. Thread-safe for concurrent AddWorkflow calls.

func NewRunMetricsRecorder

func NewRunMetricsRecorder(experimentName string) *RunMetricsRecorder

func (*RunMetricsRecorder) AddWorkflow

func (r *RunMetricsRecorder) AddWorkflow(wf WorkflowMetrics)

AddWorkflow appends workflow metrics, logs progress, and flushes the metrics file. Thread-safe.

func (*RunMetricsRecorder) BeginRun

func (r *RunMetricsRecorder) BeginRun()

func (*RunMetricsRecorder) EndRun

func (r *RunMetricsRecorder) EndRun()

func (*RunMetricsRecorder) Write

func (r *RunMetricsRecorder) Write(path string) error

Write writes the collected metrics to path. Uses MetricsPath if path is empty. Thread-safe.

type RunResults

type RunResults struct {
	ExperimentName string           `json:"experiment_name"`
	Workflows      []WorkflowResult `json:"workflows"`
}

RunResults is the full run output (prompts + responses).

type RunResultsRecorder

type RunResultsRecorder struct {
	ExperimentName string
	// contains filtered or unexported fields
}

RunResultsRecorder records workflow results and flushes to disk. Thread-safe.

func NewRunResultsRecorder

func NewRunResultsRecorder(experimentName string) *RunResultsRecorder

func (*RunResultsRecorder) AddWorkflow

func (r *RunResultsRecorder) AddWorkflow(wf WorkflowResult)

AddWorkflow appends workflow results and flushes to disk. Thread-safe.

func (*RunResultsRecorder) Write

func (r *RunResultsRecorder) Write(path string) error

Write writes the collected results to path. Uses OutputPath if path is empty. Thread-safe.

type StageMetrics

type StageMetrics struct {
	StepID           int   `json:"step_id"`
	StartTimeMs      int64 `json:"start_time_ms"`
	EndTimeMs        int64 `json:"end_time_ms"`
	DurationMs       int64 `json:"duration_ms"`
	PromptTokens     int   `json:"prompt_tokens,omitempty"`
	CompletionTokens int   `json:"completion_tokens,omitempty"`
	QueueWaitMs      int64 `json:"queue_wait_ms,omitempty"`
	BackendLatencyMs int64 `json:"backend_latency_ms,omitempty"`
	TTFTMs           int64 `json:"ttft_ms,omitempty"`
	TPOTMs           int64 `json:"tpot_ms,omitempty"`
}

StageMetrics is the timing and token usage for one stage within a workflow.

type StepResult

type StepResult struct {
	StepID   int    `json:"step_id"`
	Prompt   string `json:"prompt"`
	Response string `json:"response"`
}

StepResult is the prompt and model response for one DAG-Math step.

type WorkflowMetrics

type WorkflowMetrics struct {
	ProblemID             int            `json:"problem_id"`
	NumStages             int            `json:"num_stages"`
	Difficulty            float64        `json:"difficulty"`
	StartTimeMs           int64          `json:"start_time_ms"`
	EndTimeMs             int64          `json:"end_time_ms"`
	DurationMs            int64          `json:"duration_ms"`
	TotalPromptTokens     int            `json:"total_prompt_tokens"`
	TotalCompletionTokens int            `json:"total_completion_tokens"`
	Stages                []StageMetrics `json:"stages"`
}

WorkflowMetrics is the timing and token usage for one DAG-Math workflow.

type WorkflowResult

type WorkflowResult struct {
	ProblemID int          `json:"problem_id"`
	Steps     []StepResult `json:"steps"`
}

WorkflowResult is the full results for one DAG-Math workflow.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL