datasets

package
v0.80.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Mar 16, 2026 License: MIT Imports: 18 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

func CheckOolongAnswer added in v0.80.0

func CheckOolongAnswer(expected, actual string) bool

CheckOolongAnswer applies the same answer-matching logic used by the benchmark examples.

func EnsureDataset

func EnsureDataset(datasetName string) (string, error)

Types

type GSM8KExample

type GSM8KExample struct {
	Question string `json:"question"`
	Answer   string `json:"answer"`
}

func LoadGSM8K

func LoadGSM8K() ([]GSM8KExample, error)

type HotPotQAExample

type HotPotQAExample struct {
	ID              string          `json:"_id"`
	SupportingFacts [][]interface{} `json:"supporting_facts"`
	Context         [][]interface{} `json:"context"`
	Question        string          `json:"question"`
	Answer          string          `json:"answer"`
	Type            string          `json:"type"`
	Level           string          `json:"level"`
}

func LoadHotpotQA

func LoadHotpotQA() ([]HotPotQAExample, error)

type OolongTask added in v0.80.0

type OolongTask struct {
	ID                string `json:"id"`
	ContextLen        int    `json:"context_len"`
	Dataset           string `json:"dataset"`
	ContextWindowText string `json:"context_window_text"`
	Question          string `json:"question"`
	TaskGroup         string `json:"task_group"`
	Task              string `json:"task"`
	Answer            string `json:"answer"`
	AnswerType        string `json:"answer_type"`

	TaskID  string `json:"task_id"`
	Context string `json:"context"`
}

OolongTask represents a single OOLONG benchmark task. It supports both the HuggingFace schema and the local example schema.

func FetchOolongTasksFromHuggingFace added in v0.80.0

func FetchOolongTasksFromHuggingFace(limit int) ([]OolongTask, error)

FetchOolongTasksFromHuggingFace loads OOLONG validation rows from the public datasets server.

func FetchOolongTasksFromHuggingFaceRange added in v0.80.0

func FetchOolongTasksFromHuggingFaceRange(offset, limit int) ([]OolongTask, error)

FetchOolongTasksFromHuggingFaceRange loads a deterministic slice of OOLONG validation rows.

func LoadOolongTasksFromFile added in v0.80.0

func LoadOolongTasksFromFile(path string) ([]OolongTask, error)

LoadOolongTasksFromFile loads OOLONG tasks from a JSON file.

func SampleOolongTasks added in v0.80.0

func SampleOolongTasks() []OolongTask

SampleOolongTasks returns embedded OOLONG-style tasks for local smoke testing.

func SliceOolongTasks added in v0.80.0

func SliceOolongTasks(tasks []OolongTask, offset, limit int) []OolongTask

SliceOolongTasks returns a deterministic slice from a task set.

func (OolongTask) Normalize added in v0.80.0

func (t OolongTask) Normalize() OolongTask

Normalize fills the canonical fields from alternate schema variants.

func (*OolongTask) UnmarshalJSON added in v0.80.0

func (t *OolongTask) UnmarshalJSON(data []byte) error

UnmarshalJSON accepts HuggingFace rows where id may be a string or a number.

type SimpleDataset added in v0.40.0

type SimpleDataset struct {
	// contains filtered or unexported fields
}

SimpleDataset implements core.Dataset interface for testing and examples.

func NewSimpleDataset added in v0.40.0

func NewSimpleDataset(examples []core.Example) *SimpleDataset

NewSimpleDataset creates a new SimpleDataset with the given examples.

func (*SimpleDataset) Next added in v0.40.0

func (sd *SimpleDataset) Next() (core.Example, bool)

Next returns the next example in the dataset.

func (*SimpleDataset) Reset added in v0.40.0

func (sd *SimpleDataset) Reset()

Reset resets the dataset iterator to the beginning.

type TBLiteTask added in v0.80.0

type TBLiteTask struct {
	TaskName        string   `json:"task_name"`
	Instruction     string   `json:"instruction"`
	DockerImage     string   `json:"docker_image"`
	Category        string   `json:"category"`
	Difficulty      string   `json:"difficulty"`
	Tags            []string `json:"tags,omitempty"`
	AgentTimeoutSec int      `json:"agent_timeout_sec"`
	TestTimeoutSec  int      `json:"test_timeout_sec"`
	EnvironmentTar  string   `json:"environment_tar"`
	TestsTar        string   `json:"tests_tar"`
	TestScript      string   `json:"test_sh"`
}

TBLiteTask represents a single OpenThoughts-TBLite benchmark task. It supports both HuggingFace row payloads and local JSON fixtures.

func FetchTBLiteTasksFromHuggingFace added in v0.80.0

func FetchTBLiteTasksFromHuggingFace(limit int) ([]TBLiteTask, error)

FetchTBLiteTasksFromHuggingFace loads TBLite rows from the public datasets server.

func FetchTBLiteTasksFromHuggingFaceContext added in v0.80.0

func FetchTBLiteTasksFromHuggingFaceContext(ctx context.Context, limit int) ([]TBLiteTask, error)

FetchTBLiteTasksFromHuggingFaceContext loads TBLite rows from the public datasets server.

func FetchTBLiteTasksFromHuggingFaceRange added in v0.80.0

func FetchTBLiteTasksFromHuggingFaceRange(split string, offset, limit int) ([]TBLiteTask, error)

FetchTBLiteTasksFromHuggingFaceRange loads a deterministic slice of TBLite rows.

func FetchTBLiteTasksFromHuggingFaceRangeContext added in v0.80.0

func FetchTBLiteTasksFromHuggingFaceRangeContext(ctx context.Context, split string, offset, limit int) ([]TBLiteTask, error)

FetchTBLiteTasksFromHuggingFaceRangeContext loads a deterministic slice of TBLite rows.

func LoadTBLiteTasksFromFile added in v0.80.0

func LoadTBLiteTasksFromFile(path string) ([]TBLiteTask, error)

LoadTBLiteTasksFromFile loads TBLite tasks from a local JSON file.

func SliceTBLiteTasks added in v0.80.0

func SliceTBLiteTasks(tasks []TBLiteTask, offset, limit int) []TBLiteTask

SliceTBLiteTasks returns a deterministic slice from a task set.

func (TBLiteTask) DecodeEnvironmentArchive added in v0.80.0

func (t TBLiteTask) DecodeEnvironmentArchive() ([]byte, error)

DecodeEnvironmentArchive decodes the base64 environment tarball payload.

func (TBLiteTask) DecodeTestsArchive added in v0.80.0

func (t TBLiteTask) DecodeTestsArchive() ([]byte, error)

DecodeTestsArchive decodes the base64 tests tarball payload.

func (TBLiteTask) Normalize added in v0.80.0

func (t TBLiteTask) Normalize() TBLiteTask

Normalize fills defaults used by the benchmark harness.

func (*TBLiteTask) UnmarshalJSON added in v0.80.0

func (t *TBLiteTask) UnmarshalJSON(data []byte) error

UnmarshalJSON accepts HuggingFace rows where tags may be a JSON string and timeout fields may be numbers or strings.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL