datasets

package
v0.83.1 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Apr 22, 2026 License: MIT Imports: 18 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

func CheckOolongAnswer added in v0.80.0

func CheckOolongAnswer(expected, actual string) bool

CheckOolongAnswer applies the same answer-matching logic used by the benchmark examples.

func EnsureDataset

func EnsureDataset(datasetName string) (string, error)

func ValidateTBLiteTaskName added in v0.81.0

func ValidateTBLiteTaskName(name string) error

ValidateTBLiteTaskName ensures task names are safe to materialize as a single directory beneath a benchmark root.

Types

type GSM8KExample

type GSM8KExample struct {
	Question string `json:"question"`
	Answer   string `json:"answer"`
}

func LoadGSM8K

func LoadGSM8K() ([]GSM8KExample, error)

type HotPotQAExample

type HotPotQAExample struct {
	ID              string          `json:"_id"`
	SupportingFacts [][]interface{} `json:"supporting_facts"`
	Context         [][]interface{} `json:"context"`
	Question        string          `json:"question"`
	Answer          string          `json:"answer"`
	Type            string          `json:"type"`
	Level           string          `json:"level"`
}

func LoadHotpotQA

func LoadHotpotQA() ([]HotPotQAExample, error)

type OolongTask added in v0.80.0

type OolongTask struct {
	ID                string `json:"id"`
	ContextLen        int    `json:"context_len"`
	Dataset           string `json:"dataset"`
	ContextWindowText string `json:"context_window_text"`
	Question          string `json:"question"`
	TaskGroup         string `json:"task_group"`
	Task              string `json:"task"`
	Answer            string `json:"answer"`
	AnswerType        string `json:"answer_type"`

	TaskID  string `json:"task_id"`
	Context string `json:"context"`
}

OolongTask represents a single OOLONG benchmark task. It supports both the HuggingFace schema and the local example schema.

func FetchOolongTasksFromHuggingFace added in v0.80.0

func FetchOolongTasksFromHuggingFace(limit int) ([]OolongTask, error)

FetchOolongTasksFromHuggingFace loads OOLONG validation rows from the public datasets server.

func FetchOolongTasksFromHuggingFaceRange added in v0.80.0

func FetchOolongTasksFromHuggingFaceRange(offset, limit int) ([]OolongTask, error)

FetchOolongTasksFromHuggingFaceRange loads a deterministic slice of OOLONG validation rows.

func LoadOolongTasksFromFile added in v0.80.0

func LoadOolongTasksFromFile(path string) ([]OolongTask, error)

LoadOolongTasksFromFile loads OOLONG tasks from a JSON file.

func SampleOolongTasks added in v0.80.0

func SampleOolongTasks() []OolongTask

SampleOolongTasks returns embedded OOLONG-style tasks for local smoke testing.

func SliceOolongTasks added in v0.80.0

func SliceOolongTasks(tasks []OolongTask, offset, limit int) []OolongTask

SliceOolongTasks returns a deterministic slice from a task set.

func (OolongTask) Normalize added in v0.80.0

func (t OolongTask) Normalize() OolongTask

Normalize fills the canonical fields from alternate schema variants.

func (*OolongTask) UnmarshalJSON added in v0.80.0

func (t *OolongTask) UnmarshalJSON(data []byte) error

UnmarshalJSON accepts HuggingFace rows where id may be a string or a number.

type SimpleDataset added in v0.40.0

type SimpleDataset struct {
	// contains filtered or unexported fields
}

SimpleDataset implements core.Dataset interface for testing and examples.

func NewSimpleDataset added in v0.40.0

func NewSimpleDataset(examples []core.Example) *SimpleDataset

NewSimpleDataset creates a new SimpleDataset with the given examples.

func (*SimpleDataset) Next added in v0.40.0

func (sd *SimpleDataset) Next() (core.Example, bool)

Next returns the next example in the dataset.

func (*SimpleDataset) Reset added in v0.40.0

func (sd *SimpleDataset) Reset()

Reset resets the dataset iterator to the beginning.

type TBLiteTask added in v0.80.0

type TBLiteTask struct {
	TaskName        string   `json:"task_name"`
	Instruction     string   `json:"instruction"`
	DockerImage     string   `json:"docker_image"`
	Category        string   `json:"category"`
	Difficulty      string   `json:"difficulty"`
	Tags            []string `json:"tags,omitempty"`
	AgentTimeoutSec int      `json:"agent_timeout_sec"`
	TestTimeoutSec  int      `json:"test_timeout_sec"`
	EnvironmentTar  string   `json:"environment_tar"`
	TestsTar        string   `json:"tests_tar"`
	TestScript      string   `json:"test_sh"`
}

TBLiteTask represents a single OpenThoughts-TBLite benchmark task. It supports both HuggingFace row payloads and local JSON fixtures.

func FetchTBLiteTasksByNamesContext added in v0.81.0

func FetchTBLiteTasksByNamesContext(ctx context.Context, split string, taskNames []string) ([]TBLiteTask, error)

FetchTBLiteTasksByNamesContext resolves named tasks from the datasets server while preserving the requested order.

func FetchTBLiteTasksFromHuggingFace added in v0.80.0

func FetchTBLiteTasksFromHuggingFace(limit int) ([]TBLiteTask, error)

FetchTBLiteTasksFromHuggingFace loads TBLite rows from the public datasets server.

func FetchTBLiteTasksFromHuggingFaceContext added in v0.80.0

func FetchTBLiteTasksFromHuggingFaceContext(ctx context.Context, limit int) ([]TBLiteTask, error)

FetchTBLiteTasksFromHuggingFaceContext loads TBLite rows from the public datasets server.

func FetchTBLiteTasksFromHuggingFaceRange added in v0.80.0

func FetchTBLiteTasksFromHuggingFaceRange(split string, offset, limit int) ([]TBLiteTask, error)

FetchTBLiteTasksFromHuggingFaceRange loads a deterministic slice of TBLite rows.

func FetchTBLiteTasksFromHuggingFaceRangeContext added in v0.80.0

func FetchTBLiteTasksFromHuggingFaceRangeContext(ctx context.Context, split string, offset, limit int) ([]TBLiteTask, error)

FetchTBLiteTasksFromHuggingFaceRangeContext loads a deterministic slice of TBLite rows.

func LoadTBLiteTasksFromFile added in v0.80.0

func LoadTBLiteTasksFromFile(path string) ([]TBLiteTask, error)

LoadTBLiteTasksFromFile loads TBLite tasks from a local JSON file.

func SliceTBLiteTasks added in v0.80.0

func SliceTBLiteTasks(tasks []TBLiteTask, offset, limit int) []TBLiteTask

SliceTBLiteTasks returns a deterministic slice from a task set.

func (TBLiteTask) DecodeEnvironmentArchive added in v0.80.0

func (t TBLiteTask) DecodeEnvironmentArchive() ([]byte, error)

DecodeEnvironmentArchive decodes the base64 environment tarball payload.

func (TBLiteTask) DecodeTestsArchive added in v0.80.0

func (t TBLiteTask) DecodeTestsArchive() ([]byte, error)

DecodeTestsArchive decodes the base64 tests tarball payload.

func (TBLiteTask) Normalize added in v0.80.0

func (t TBLiteTask) Normalize() TBLiteTask

Normalize fills defaults used by the benchmark harness.

func (*TBLiteTask) UnmarshalJSON added in v0.80.0

func (t *TBLiteTask) UnmarshalJSON(data []byte) error

UnmarshalJSON accepts HuggingFace rows where tags may be a JSON string and timeout fields may be numbers or strings.

type TBLiteTaskSelection added in v0.81.0

type TBLiteTaskSelection struct {
	Label     string       `json:"label,omitempty"`
	Split     string       `json:"split,omitempty"`
	TaskNames []string     `json:"task_names,omitempty"`
	Tasks     []TBLiteTask `json:"tasks,omitempty"`
}

TBLiteTaskSelection describes a curated benchmark slice. It can reference existing HuggingFace tasks by name and/or embed full local task payloads.

func LoadTBLiteTaskSelectionFromFile added in v0.81.0

func LoadTBLiteTaskSelectionFromFile(path string) (*TBLiteTaskSelection, error)

LoadTBLiteTaskSelectionFromFile loads a curated TBLite benchmark manifest. Supported JSON shapes: - ["task-a", "task-b"] - [{...full task...}, {...full task...}] - {"label":"...", "split":"train", "task_names":[...], "tasks":[...]}.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL