Documentation
¶
Overview ¶
Package shared provides common types and helpers for DAG-Math memory evaluation experiments.
Index ¶
- Constants
- Variables
- func BuildStagePrompt(problem DAGMathProblem, step DAGMathStep, depResults map[int]string) string
- func LogDeferredError(fn func() error)
- type DAGMathDataset
- type DAGMathProblem
- type DAGMathStep
- type RunMetrics
- type RunMetricsRecorder
- type RunResults
- type RunResultsRecorder
- type StageMetrics
- type StepResult
- type WorkflowMetrics
- type WorkflowResult
Constants ¶
const ( MaxOutputTokens = 256 OrlaURL = "http://orla:8081" SGLangURL = "http://sglang:30000/v1" DatasetRoot = "/dataset/test" OutputPath = "/output/results.json" MetricsPath = "/output/metrics.json" )
const SystemPrompt = `` /* 460-byte string literal not displayed */
SystemPrompt instructs the model to continue the mathematical reasoning chain.
Variables ¶
var MaxInstances = maxInstancesFromEnv()
Functions ¶
func BuildStagePrompt ¶
func BuildStagePrompt(problem DAGMathProblem, step DAGMathStep, depResults map[int]string) string
BuildStagePrompt constructs the user message for a single DAG-Math step. Order is chosen for KV cache reuse: problem first (shared), then previous steps (shared among siblings), then current step (unique per stage).
func LogDeferredError ¶
func LogDeferredError(fn func() error)
Types ¶
type DAGMathDataset ¶
type DAGMathDataset struct {
Problems []DAGMathProblem
}
DAGMathDataset is the loaded set of problems.
func LoadDataset ¶
func LoadDataset() (*DAGMathDataset, error)
LoadDataset opens the dataset root and loads all .json problem files.
type DAGMathProblem ¶
type DAGMathProblem struct {
ProblemID int `json:"problem_id"`
ProblemText string `json:"problem_text"`
FinalAnswer string `json:"final_answer"`
Difficulty float64 `json:"difficulty"`
Domain []string `json:"domain"`
Steps []DAGMathStep `json:"steps"`
}
DAGMathProblem is one problem from the DAG-MATH-Formatted-CoT dataset.
type DAGMathStep ¶
type DAGMathStep struct {
StepID int `json:"step_id"`
Edge string `json:"edge"`
DirectDependentSteps []int `json:"direct_dependent_steps"`
Node string `json:"node"`
}
DAGMathStep is one step (node) in a DAG-Math solution.
type RunMetrics ¶
type RunMetrics struct {
ExperimentName string `json:"experiment_name"`
StartTimeMs int64 `json:"start_time_ms"`
EndTimeMs int64 `json:"end_time_ms"`
TotalDurationMs int64 `json:"total_duration_ms"`
WorkflowsCount int `json:"workflows_count"`
TotalPromptTokens int `json:"total_prompt_tokens"`
TotalCompletionTokens int `json:"total_completion_tokens"`
Workflows []WorkflowMetrics `json:"workflows"`
}
RunMetrics is the full run output.
type RunMetricsRecorder ¶
type RunMetricsRecorder struct {
ExperimentName string
TotalWorkflows int
// contains filtered or unexported fields
}
RunMetricsRecorder records timings for a run. Thread-safe for concurrent AddWorkflow calls.
func NewRunMetricsRecorder ¶
func NewRunMetricsRecorder(experimentName string) *RunMetricsRecorder
func (*RunMetricsRecorder) AddWorkflow ¶
func (r *RunMetricsRecorder) AddWorkflow(wf WorkflowMetrics)
AddWorkflow appends workflow metrics, logs progress, and flushes the metrics file. Thread-safe.
func (*RunMetricsRecorder) BeginRun ¶
func (r *RunMetricsRecorder) BeginRun()
func (*RunMetricsRecorder) EndRun ¶
func (r *RunMetricsRecorder) EndRun()
func (*RunMetricsRecorder) Write ¶
func (r *RunMetricsRecorder) Write(path string) error
Write writes the collected metrics to path. Uses MetricsPath if path is empty. Thread-safe.
type RunResults ¶
type RunResults struct {
ExperimentName string `json:"experiment_name"`
Workflows []WorkflowResult `json:"workflows"`
}
RunResults is the full run output (prompts + responses).
type RunResultsRecorder ¶
type RunResultsRecorder struct {
ExperimentName string
// contains filtered or unexported fields
}
RunResultsRecorder records workflow results and flushes to disk. Thread-safe.
func NewRunResultsRecorder ¶
func NewRunResultsRecorder(experimentName string) *RunResultsRecorder
func (*RunResultsRecorder) AddWorkflow ¶
func (r *RunResultsRecorder) AddWorkflow(wf WorkflowResult)
AddWorkflow appends workflow results and flushes to disk. Thread-safe.
func (*RunResultsRecorder) Write ¶
func (r *RunResultsRecorder) Write(path string) error
Write writes the collected results to path. Uses OutputPath if path is empty. Thread-safe.
type StageMetrics ¶
type StageMetrics struct {
StepID int `json:"step_id"`
StartTimeMs int64 `json:"start_time_ms"`
EndTimeMs int64 `json:"end_time_ms"`
DurationMs int64 `json:"duration_ms"`
PromptTokens int `json:"prompt_tokens,omitempty"`
CompletionTokens int `json:"completion_tokens,omitempty"`
QueueWaitMs int64 `json:"queue_wait_ms,omitempty"`
BackendLatencyMs int64 `json:"backend_latency_ms,omitempty"`
TTFTMs int64 `json:"ttft_ms,omitempty"`
TPOTMs int64 `json:"tpot_ms,omitempty"`
}
StageMetrics is the timing and token usage for one stage within a workflow.
type StepResult ¶
type StepResult struct {
StepID int `json:"step_id"`
Prompt string `json:"prompt"`
Response string `json:"response"`
}
StepResult is the prompt and model response for one DAG-Math step.
type WorkflowMetrics ¶
type WorkflowMetrics struct {
ProblemID int `json:"problem_id"`
NumStages int `json:"num_stages"`
Difficulty float64 `json:"difficulty"`
StartTimeMs int64 `json:"start_time_ms"`
EndTimeMs int64 `json:"end_time_ms"`
DurationMs int64 `json:"duration_ms"`
TotalPromptTokens int `json:"total_prompt_tokens"`
TotalCompletionTokens int `json:"total_completion_tokens"`
Stages []StageMetrics `json:"stages"`
}
WorkflowMetrics is the timing and token usage for one DAG-Math workflow.
type WorkflowResult ¶
type WorkflowResult struct {
ProblemID int `json:"problem_id"`
Steps []StepResult `json:"steps"`
}
WorkflowResult is the full results for one DAG-Math workflow.