Documentation
¶
Index ¶
- func CheckOolongAnswer(expected, actual string) bool
- func EnsureDataset(datasetName string) (string, error)
- func ValidateTBLiteTaskName(name string) error
- type GSM8KExample
- type HotPotQAExample
- type OolongTask
- func FetchOolongTasksFromHuggingFace(limit int) ([]OolongTask, error)
- func FetchOolongTasksFromHuggingFaceRange(offset, limit int) ([]OolongTask, error)
- func LoadOolongTasksFromFile(path string) ([]OolongTask, error)
- func SampleOolongTasks() []OolongTask
- func SliceOolongTasks(tasks []OolongTask, offset, limit int) []OolongTask
- type SimpleDataset
- type TBLiteTask
- func FetchTBLiteTasksByNamesContext(ctx context.Context, split string, taskNames []string) ([]TBLiteTask, error)
- func FetchTBLiteTasksFromHuggingFace(limit int) ([]TBLiteTask, error)
- func FetchTBLiteTasksFromHuggingFaceContext(ctx context.Context, limit int) ([]TBLiteTask, error)
- func FetchTBLiteTasksFromHuggingFaceRange(split string, offset, limit int) ([]TBLiteTask, error)
- func FetchTBLiteTasksFromHuggingFaceRangeContext(ctx context.Context, split string, offset, limit int) ([]TBLiteTask, error)
- func LoadTBLiteTasksFromFile(path string) ([]TBLiteTask, error)
- func SliceTBLiteTasks(tasks []TBLiteTask, offset, limit int) []TBLiteTask
- type TBLiteTaskSelection
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
func CheckOolongAnswer ¶ added in v0.80.0
CheckOolongAnswer applies the same answer-matching logic used by the benchmark examples.
func EnsureDataset ¶
func ValidateTBLiteTaskName ¶ added in v0.81.0
ValidateTBLiteTaskName ensures task names are safe to materialize as a single directory beneath a benchmark root.
Types ¶
type GSM8KExample ¶
func LoadGSM8K ¶
func LoadGSM8K() ([]GSM8KExample, error)
type HotPotQAExample ¶
type HotPotQAExample struct {
ID string `json:"_id"`
SupportingFacts [][]interface{} `json:"supporting_facts"`
Context [][]interface{} `json:"context"`
Question string `json:"question"`
Answer string `json:"answer"`
Type string `json:"type"`
Level string `json:"level"`
}
func LoadHotpotQA ¶
func LoadHotpotQA() ([]HotPotQAExample, error)
type OolongTask ¶ added in v0.80.0
type OolongTask struct {
ID string `json:"id"`
ContextLen int `json:"context_len"`
Dataset string `json:"dataset"`
ContextWindowText string `json:"context_window_text"`
Question string `json:"question"`
TaskGroup string `json:"task_group"`
Task string `json:"task"`
Answer string `json:"answer"`
AnswerType string `json:"answer_type"`
TaskID string `json:"task_id"`
Context string `json:"context"`
}
OolongTask represents a single OOLONG benchmark task. It supports both the HuggingFace schema and the local example schema.
func FetchOolongTasksFromHuggingFace ¶ added in v0.80.0
func FetchOolongTasksFromHuggingFace(limit int) ([]OolongTask, error)
FetchOolongTasksFromHuggingFace loads OOLONG validation rows from the public datasets server.
func FetchOolongTasksFromHuggingFaceRange ¶ added in v0.80.0
func FetchOolongTasksFromHuggingFaceRange(offset, limit int) ([]OolongTask, error)
FetchOolongTasksFromHuggingFaceRange loads a deterministic slice of OOLONG validation rows.
func LoadOolongTasksFromFile ¶ added in v0.80.0
func LoadOolongTasksFromFile(path string) ([]OolongTask, error)
LoadOolongTasksFromFile loads OOLONG tasks from a JSON file.
func SampleOolongTasks ¶ added in v0.80.0
func SampleOolongTasks() []OolongTask
SampleOolongTasks returns embedded OOLONG-style tasks for local smoke testing.
func SliceOolongTasks ¶ added in v0.80.0
func SliceOolongTasks(tasks []OolongTask, offset, limit int) []OolongTask
SliceOolongTasks returns a deterministic slice from a task set.
func (OolongTask) Normalize ¶ added in v0.80.0
func (t OolongTask) Normalize() OolongTask
Normalize fills the canonical fields from alternate schema variants.
func (*OolongTask) UnmarshalJSON ¶ added in v0.80.0
func (t *OolongTask) UnmarshalJSON(data []byte) error
UnmarshalJSON accepts HuggingFace rows where id may be a string or a number.
type SimpleDataset ¶ added in v0.40.0
type SimpleDataset struct {
// contains filtered or unexported fields
}
SimpleDataset implements core.Dataset interface for testing and examples.
func NewSimpleDataset ¶ added in v0.40.0
func NewSimpleDataset(examples []core.Example) *SimpleDataset
NewSimpleDataset creates a new SimpleDataset with the given examples.
func (*SimpleDataset) Next ¶ added in v0.40.0
func (sd *SimpleDataset) Next() (core.Example, bool)
Next returns the next example in the dataset.
func (*SimpleDataset) Reset ¶ added in v0.40.0
func (sd *SimpleDataset) Reset()
Reset resets the dataset iterator to the beginning.
type TBLiteTask ¶ added in v0.80.0
type TBLiteTask struct {
TaskName string `json:"task_name"`
Instruction string `json:"instruction"`
DockerImage string `json:"docker_image"`
Category string `json:"category"`
Difficulty string `json:"difficulty"`
Tags []string `json:"tags,omitempty"`
AgentTimeoutSec int `json:"agent_timeout_sec"`
TestTimeoutSec int `json:"test_timeout_sec"`
EnvironmentTar string `json:"environment_tar"`
TestsTar string `json:"tests_tar"`
TestScript string `json:"test_sh"`
}
TBLiteTask represents a single OpenThoughts-TBLite benchmark task. It supports both HuggingFace row payloads and local JSON fixtures.
func FetchTBLiteTasksByNamesContext ¶ added in v0.81.0
func FetchTBLiteTasksByNamesContext(ctx context.Context, split string, taskNames []string) ([]TBLiteTask, error)
FetchTBLiteTasksByNamesContext resolves named tasks from the datasets server while preserving the requested order.
func FetchTBLiteTasksFromHuggingFace ¶ added in v0.80.0
func FetchTBLiteTasksFromHuggingFace(limit int) ([]TBLiteTask, error)
FetchTBLiteTasksFromHuggingFace loads TBLite rows from the public datasets server.
func FetchTBLiteTasksFromHuggingFaceContext ¶ added in v0.80.0
func FetchTBLiteTasksFromHuggingFaceContext(ctx context.Context, limit int) ([]TBLiteTask, error)
FetchTBLiteTasksFromHuggingFaceContext loads TBLite rows from the public datasets server.
func FetchTBLiteTasksFromHuggingFaceRange ¶ added in v0.80.0
func FetchTBLiteTasksFromHuggingFaceRange(split string, offset, limit int) ([]TBLiteTask, error)
FetchTBLiteTasksFromHuggingFaceRange loads a deterministic slice of TBLite rows.
func FetchTBLiteTasksFromHuggingFaceRangeContext ¶ added in v0.80.0
func FetchTBLiteTasksFromHuggingFaceRangeContext(ctx context.Context, split string, offset, limit int) ([]TBLiteTask, error)
FetchTBLiteTasksFromHuggingFaceRangeContext loads a deterministic slice of TBLite rows.
func LoadTBLiteTasksFromFile ¶ added in v0.80.0
func LoadTBLiteTasksFromFile(path string) ([]TBLiteTask, error)
LoadTBLiteTasksFromFile loads TBLite tasks from a local JSON file.
func SliceTBLiteTasks ¶ added in v0.80.0
func SliceTBLiteTasks(tasks []TBLiteTask, offset, limit int) []TBLiteTask
SliceTBLiteTasks returns a deterministic slice from a task set.
func (TBLiteTask) DecodeEnvironmentArchive ¶ added in v0.80.0
func (t TBLiteTask) DecodeEnvironmentArchive() ([]byte, error)
DecodeEnvironmentArchive decodes the base64 environment tarball payload.
func (TBLiteTask) DecodeTestsArchive ¶ added in v0.80.0
func (t TBLiteTask) DecodeTestsArchive() ([]byte, error)
DecodeTestsArchive decodes the base64 tests tarball payload.
func (TBLiteTask) Normalize ¶ added in v0.80.0
func (t TBLiteTask) Normalize() TBLiteTask
Normalize fills defaults used by the benchmark harness.
func (*TBLiteTask) UnmarshalJSON ¶ added in v0.80.0
func (t *TBLiteTask) UnmarshalJSON(data []byte) error
UnmarshalJSON accepts HuggingFace rows where tags may be a JSON string and timeout fields may be numbers or strings.
type TBLiteTaskSelection ¶ added in v0.81.0
type TBLiteTaskSelection struct {
Label string `json:"label,omitempty"`
Split string `json:"split,omitempty"`
TaskNames []string `json:"task_names,omitempty"`
Tasks []TBLiteTask `json:"tasks,omitempty"`
}
TBLiteTaskSelection describes a curated benchmark slice. It can reference existing HuggingFace tasks by name and/or embed full local task payloads.
func LoadTBLiteTaskSelectionFromFile ¶ added in v0.81.0
func LoadTBLiteTaskSelectionFromFile(path string) (*TBLiteTaskSelection, error)
LoadTBLiteTaskSelectionFromFile loads a curated TBLite benchmark manifest. Supported JSON shapes: - ["task-a", "task-b"] - [{...full task...}, {...full task...}] - {"label":"...", "split":"train", "task_names":[...], "tasks":[...]}.