lerobot

package
v1.0.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jun 5, 2026 License: MIT Imports: 12 Imported by: 0

Documentation

Overview

Package lerobot implements the HuggingFace LeRobot dataset writer and inspector for on-disk formats v2.1 and v3.0.

Supported versions

  • V21 (codebase v2.1): per-episode parquet, optional mp4 videos or embedded images
  • V30 (codebase v3.0): chunked parquet, meta/tasks.parquet, chunked videos

Typical workflows

Parallel ingest (recommended for robotics pipelines):

  • NewStagingWriter per episode under a shared staging root (ep_NNNNNN dirs)
  • Merge to produce the final dataset (CLI: lerobot-go create)

Serial ingest:

  • Create opens Root/_staging, AddFrame / SaveEpisode per episode
  • Finalize runs Merge into Root

Validation:

  • NewInspector().Validate or ValidateStrict on a dataset directory

CLI commands validate, convert, create, and merge call the same internal logic. See docs/API.md and examples/ for runnable code.

Index

Examples

Constants

This section is empty.

Variables

This section is empty.

Functions

func Merge

func Merge(ctx context.Context, cfg MergeConfig) error

Merge finalizes completed staging episodes into the official on-disk layout.

func RunEpisodeJobs

func RunEpisodeJobs(ctx context.Context, maxWorkers int, jobs []func(context.Context) error) error

RunEpisodeJobs executes staging jobs with bounded concurrency.

Types

type CreateConfig

type CreateConfig struct {
	Version   Version
	RepoID    string
	Root      string
	TempRoot  string
	FPS       int
	RobotType string
	Features  map[string]FeatureSpec
	UseVideos bool
	VCodec    string
	CRF       int
	FFmpeg    FFmpegConfig
	Streaming bool
	Stats     StatsMode
}

CreateConfig builds a dataset serially: staging under Root/_staging, then merge on Finalize.

type Dataset

type Dataset interface {
	AddFrame(ctx context.Context, frame Frame) error
	SaveEpisode(ctx context.Context) error
	Finalize(ctx context.Context) error
	Root() string
}

Dataset is a serial writer: episodes are staged under Root/_staging until Finalize.

func Create

func Create(ctx context.Context, cfg CreateConfig) (Dataset, error)

Create starts a serial dataset under cfg.Root (staging in Root/_staging).

type EpisodeManifest

type EpisodeManifest struct {
	EpisodeIndex int
	Length       int
	Tasks        []string
	Dir          string
}

EpisodeManifest summarizes a completed staging episode.

type FFmpegConfig

type FFmpegConfig struct {
	FFmpegPath  string
	FFprobePath string
}

FFmpegConfig selects ffmpeg and ffprobe binaries (empty paths use PATH lookup).

type FeatureSpec

type FeatureSpec = meta.FeatureSpec

FeatureSpec describes one column in meta/info.json and parquet schema.

type Frame

type Frame struct {
	Task   string
	Values map[string]any
}

Frame is one timestep. Task maps to the task_index column; Values hold feature payloads.

type Inspector

type Inspector interface {
	Validate(ctx context.Context, root string) (*ValidationReport, error)
	ValidateStrict(ctx context.Context, root string) (*ValidationReport, error)
	SchemaDiff(ctx context.Context, goldenRoot, candidateRoot string) (*SchemaDiffReport, error)
}

Inspector validates LeRobot datasets and compares schemas.

func NewInspector

func NewInspector() Inspector

NewInspector returns a dataset layout inspector.

Example
package main

import (
	"context"
	"fmt"
	"os"

	"github.com/ioai-tech/lerobot-go/lerobot"
)

func main() {
	ctx := context.Background()
	root, err := os.MkdirTemp("", "lerobot-insp-*")
	if err != nil {
		panic(err)
	}
	defer func() { _ = os.RemoveAll(root) }()

	// Empty dir is not a valid dataset; inspector reports errors without panicking.
	insp := lerobot.NewInspector()
	report, err := insp.Validate(ctx, root)
	if err != nil {
		panic(err)
	}
	fmt.Println(report.OK)
}
Output:
false

type MergeConfig

type MergeConfig struct {
	Version     Version
	StagingRoot string
	OutputRoot  string
	RepoID      string
	RobotType   string
	FPS         int
	Features    map[string]FeatureSpec
	FFmpeg      FFmpegConfig
	MaxWorkers  int
	Stats       StatsMode
}

MergeConfig finalizes completed staging episodes into the on-disk dataset layout.

type SchemaDiffReport

type SchemaDiffReport struct {
	OK     bool
	Diffs  []string
	Errors []string
}

SchemaDiffReport compares meta/info.json between two datasets.

type StagingConfig

type StagingConfig struct {
	Version   Version
	Dir       string
	Episode   int
	TempRoot  string
	FPS       int
	RobotType string
	Features  map[string]FeatureSpec
	UseVideos bool
	VCodec    string
	CRF       int
	FFmpeg    FFmpegConfig
	Streaming bool
	Stats     StatsMode
}

StagingConfig writes one episode directory (ep_NNNNNN) for parallel ingestion.

type StagingWriter

type StagingWriter interface {
	AddFrame(ctx context.Context, frame Frame) error
	SaveEpisode(ctx context.Context) (*EpisodeManifest, error)
	Close() error
}

StagingWriter records one episode into a single ep_NNNNNN directory.

func NewStagingWriter

func NewStagingWriter(ctx context.Context, cfg StagingConfig) (StagingWriter, error)

NewStagingWriter opens a per-episode staging directory for parallel ingestion.

Example (Merge)
package main

import (
	"context"
	"fmt"
	"os"
	"path/filepath"

	"github.com/ioai-tech/lerobot-go/lerobot"
)

func main() {
	ctx := context.Background()
	root, err := os.MkdirTemp("", "lerobot-example-*")
	if err != nil {
		panic(err)
	}
	defer func() { _ = os.RemoveAll(root) }()

	stagingRoot := filepath.Join(root, "staging")
	out := filepath.Join(root, "out")
	features := map[string]lerobot.FeatureSpec{
		"observation.state": {DType: "float32", Shape: []int{2}},
		"action":            {DType: "float32", Shape: []int{2}},
	}

	w, err := lerobot.NewStagingWriter(ctx, lerobot.StagingConfig{
		Version: lerobot.V30, Dir: filepath.Join(stagingRoot, "ep_000000"),
		Episode: 0, FPS: 10, Features: features,
	})
	if err != nil {
		panic(err)
	}
	_ = w.AddFrame(ctx, lerobot.Frame{
		Task: "demo",
		Values: map[string]any{
			"observation.state": []float32{1, 2},
			"action":            []float32{0, 0},
		},
	})
	_, _ = w.SaveEpisode(ctx)
	_ = lerobot.Merge(ctx, lerobot.MergeConfig{
		Version: lerobot.V30, StagingRoot: stagingRoot, OutputRoot: out,
		FPS: 10, Features: features,
	})

	insp := lerobot.NewInspector()
	report, _ := insp.Validate(ctx, out)
	fmt.Println(report.OK)
}
Output:
true

type StatsMode

type StatsMode int

StatsMode controls image/video episode statistics during finalize.

const (
	// StatsSampled uses the official subsampling heuristic (default).
	StatsSampled StatsMode = iota
	// StatsFull scans every frame when computing image/video stats.
	StatsFull
)

type ValidationReport

type ValidationReport struct {
	OK       bool
	Errors   []string
	Warnings []string
	Info     meta.DatasetInfo
	Summary  string
	Version  string
}

ValidationReport is the result of layout validation.

type Version

type Version int

Version identifies a LeRobot on-disk layout (v2.1 or v3.0).

const (
	VersionUnset Version = 0
	// V21 is LeRobot codebase v2.1 (per-episode parquet, optional mp4 videos).
	V21 Version = 21
	// V30 is LeRobot codebase v3.0 (chunked parquet, tasks.parquet).
	V30 Version = 30
)

func (Version) String

func (v Version) String() string

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL