shared

package

v1.2.10 Latest Latest Go to latest Published: Mar 23, 2026 License: MIT Imports: 10 Imported by: 0

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/harvard-cns/orla

Links

Open Source Insights

Documentation ¶

Overview ¶

Package shared provides common types and helpers for DAG-Math memory evaluation experiments.

Index ¶

Constants
Variables
func BuildStagePrompt(problem DAGMathProblem, step DAGMathStep, depResults map[int]string) string
func LogDeferredError(fn func() error)
type DAGMathDataset
- func LoadDataset() (*DAGMathDataset, error)
type DAGMathProblem
type DAGMathStep
type RunMetrics
type RunMetricsRecorder
- func NewRunMetricsRecorder(experimentName string) *RunMetricsRecorder
- func (r *RunMetricsRecorder) AddWorkflow(wf WorkflowMetrics)
- func (r *RunMetricsRecorder) BeginRun()
- func (r *RunMetricsRecorder) EndRun()
- func (r *RunMetricsRecorder) Write(path string) error
type RunResults
type RunResultsRecorder
- func NewRunResultsRecorder(experimentName string) *RunResultsRecorder
- func (r *RunResultsRecorder) AddWorkflow(wf WorkflowResult)
- func (r *RunResultsRecorder) Write(path string) error
type StageMetrics
type StepResult
type WorkflowMetrics
type WorkflowResult

Constants ¶

View Source

const (
	MaxOutputTokens = 256

	OrlaURL     = "http://orla:8081"
	SGLangURL   = "http://sglang:30000/v1"
	DatasetRoot = "/dataset/test"
	OutputPath  = "/output/results.json"
	MetricsPath = "/output/metrics.json"
)

View Source

const SystemPrompt = `` /* 460-byte string literal not displayed */

SystemPrompt instructs the model to continue the mathematical reasoning chain.

Variables ¶

View Source

var MaxInstances = maxInstancesFromEnv()

Functions ¶

func BuildStagePrompt ¶

func BuildStagePrompt(problem DAGMathProblem, step DAGMathStep, depResults map[int]string) string

BuildStagePrompt constructs the user message for a single DAG-Math step. Order is chosen for KV cache reuse: problem first (shared), then previous steps (shared among siblings), then current step (unique per stage).

func LogDeferredError ¶

func LogDeferredError(fn func() error)

Types ¶

type DAGMathDataset ¶

type DAGMathDataset struct {
	Problems []DAGMathProblem
}

DAGMathDataset is the loaded set of problems.

func LoadDataset ¶

func LoadDataset() (*DAGMathDataset, error)

LoadDataset opens the dataset root and loads all .json problem files.

type DAGMathProblem ¶

type DAGMathProblem struct {
	ProblemID   int           `json:"problem_id"`
	ProblemText string        `json:"problem_text"`
	FinalAnswer string        `json:"final_answer"`
	Difficulty  float64       `json:"difficulty"`
	Domain      []string      `json:"domain"`
	Steps       []DAGMathStep `json:"steps"`
}

DAGMathProblem is one problem from the DAG-MATH-Formatted-CoT dataset.

type DAGMathStep ¶

type DAGMathStep struct {
	StepID               int    `json:"step_id"`
	Edge                 string `json:"edge"`
	DirectDependentSteps []int  `json:"direct_dependent_steps"`
	Node                 string `json:"node"`
}

DAGMathStep is one step (node) in a DAG-Math solution.

type RunMetrics ¶

type RunMetrics struct {
	ExperimentName        string            `json:"experiment_name"`
	StartTimeMs           int64             `json:"start_time_ms"`
	EndTimeMs             int64             `json:"end_time_ms"`
	TotalDurationMs       int64             `json:"total_duration_ms"`
	WorkflowsCount        int               `json:"workflows_count"`
	TotalPromptTokens     int               `json:"total_prompt_tokens"`
	TotalCompletionTokens int               `json:"total_completion_tokens"`
	Workflows             []WorkflowMetrics `json:"workflows"`
}

RunMetrics is the full run output.

type RunMetricsRecorder ¶

type RunMetricsRecorder struct {
	ExperimentName string
	TotalWorkflows int
	// contains filtered or unexported fields
}

RunMetricsRecorder records timings for a run. Thread-safe for concurrent AddWorkflow calls.

func NewRunMetricsRecorder ¶

func NewRunMetricsRecorder(experimentName string) *RunMetricsRecorder

func (*RunMetricsRecorder) AddWorkflow ¶

func (r *RunMetricsRecorder) AddWorkflow(wf WorkflowMetrics)

AddWorkflow appends workflow metrics, logs progress, and flushes the metrics file. Thread-safe.

func (*RunMetricsRecorder) BeginRun ¶

func (r *RunMetricsRecorder) BeginRun()

func (*RunMetricsRecorder) EndRun ¶

func (r *RunMetricsRecorder) EndRun()

func (*RunMetricsRecorder) Write ¶

func (r *RunMetricsRecorder) Write(path string) error

Write writes the collected metrics to path. Uses MetricsPath if path is empty. Thread-safe.

type RunResults ¶

type RunResults struct {
	ExperimentName string           `json:"experiment_name"`
	Workflows      []WorkflowResult `json:"workflows"`
}

RunResults is the full run output (prompts + responses).

type RunResultsRecorder ¶

type RunResultsRecorder struct {
	ExperimentName string
	// contains filtered or unexported fields
}

RunResultsRecorder records workflow results and flushes to disk. Thread-safe.

func NewRunResultsRecorder ¶

func NewRunResultsRecorder(experimentName string) *RunResultsRecorder

func (*RunResultsRecorder) AddWorkflow ¶

func (r *RunResultsRecorder) AddWorkflow(wf WorkflowResult)

AddWorkflow appends workflow results and flushes to disk. Thread-safe.

func (*RunResultsRecorder) Write ¶

func (r *RunResultsRecorder) Write(path string) error

Write writes the collected results to path. Uses OutputPath if path is empty. Thread-safe.

type StageMetrics ¶

type StageMetrics struct {
	StepID           int   `json:"step_id"`
	StartTimeMs      int64 `json:"start_time_ms"`
	EndTimeMs        int64 `json:"end_time_ms"`
	DurationMs       int64 `json:"duration_ms"`
	PromptTokens     int   `json:"prompt_tokens,omitempty"`
	CompletionTokens int   `json:"completion_tokens,omitempty"`
	QueueWaitMs      int64 `json:"queue_wait_ms,omitempty"`
	BackendLatencyMs int64 `json:"backend_latency_ms,omitempty"`
	TTFTMs           int64 `json:"ttft_ms,omitempty"`
	TPOTMs           int64 `json:"tpot_ms,omitempty"`
}

StageMetrics is the timing and token usage for one stage within a workflow.

type StepResult ¶

type StepResult struct {
	StepID   int    `json:"step_id"`
	Prompt   string `json:"prompt"`
	Response string `json:"response"`
}

StepResult is the prompt and model response for one DAG-Math step.

type WorkflowMetrics ¶

type WorkflowMetrics struct {
	ProblemID             int            `json:"problem_id"`
	NumStages             int            `json:"num_stages"`
	Difficulty            float64        `json:"difficulty"`
	StartTimeMs           int64          `json:"start_time_ms"`
	EndTimeMs             int64          `json:"end_time_ms"`
	DurationMs            int64          `json:"duration_ms"`
	TotalPromptTokens     int            `json:"total_prompt_tokens"`
	TotalCompletionTokens int            `json:"total_completion_tokens"`
	Stages                []StageMetrics `json:"stages"`
}

WorkflowMetrics is the timing and token usage for one DAG-Math workflow.

type WorkflowResult ¶

type WorkflowResult struct {
	ProblemID int          `json:"problem_id"`
	Steps     []StepResult `json:"steps"`
}

WorkflowResult is the full results for one DAG-Math workflow.

Source Files ¶

View all Source files

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL