openai

package

v0.0.39 Latest Latest Go to latest Published: Jan 27, 2026 License: Apache-2.0 Imports: 14 Imported by: 0

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/mutablelogic/go-whisper

Links

Open Source Insights

Documentation ¶

Index ¶

Constants
Variables
func LanguageCode(language string) (string, string)
type ChunkingStrategy
- func (c ChunkingStrategy) String() string
type Client
- func New(apikey string, opts ...client.ClientOpt) (*Client, error)
- func (c *Client) Transcribe(ctx context.Context, req TranscriptionRequest, streamfn func(schema.Event)) (*TranscriptionResponse, error)
- func (c *Client) Translate(ctx context.Context, req TranslationRequest) (*TranscriptionResponse, error)
type TranscriptionRequest
- func (s TranscriptionRequest) String() string
type TranscriptionResponse
type TranscriptionSegment
- func (seg *TranscriptionSegment) IdAsInt32() int32
type TranscriptionUsage
type TranslationRequest
- func (s TranslationRequest) String() string

Constants ¶

View Source

const (
	Endpoint       = "https://api.openai.com/v1/"
	TranscribePath = "audio/transcriptions" // Endpoint for transcription
	TranslatePath  = "audio/translations"   // Endpoint for translation
)

View Source

const (
	FormatJson         = "json"
	FormatVerboseJson  = "verbose_json"
	FormatDiarizedJson = "diarized_json"
	FormatText         = "text"
	FormatSrt          = "srt"
	FormatVtt          = "vtt"
)

View Source

const (
	ChunkingStrategyAuto      = "auto"       // Auto chunking with VAD
	ChunkingStrategyServerVAD = "server_vad" // Server-side VAD chunking (required for diarization)
)

Variables ¶

View Source

var (
	// Supported models for transcription and translation
	Models = []string{
		"whisper-1",
		"gpt-4o-mini-transcribe",
		"gpt-4o-mini-transcribe-2025-12-15",
		"gpt-4o-transcribe",
	}
	// Supported models for diarization
	DiarizeModels = []string{
		"gpt-4o-transcribe-diarize",
	}

	// Supported response formats
	Formats = []string{
		FormatText, FormatJson, FormatVerboseJson, FormatDiarizedJson, FormatSrt, FormatVtt,
	}
)

Functions ¶

func LanguageCode ¶

func LanguageCode(language string) (string, string)

LanguageCode returns the language and two-letter OpenAI language code for a given tuple, or an empty string if the language is not recognized.

Types ¶

type ChunkingStrategy ¶ added in v0.0.39

type ChunkingStrategy struct {
	Type              string   `json:"type"`                          // "auto" or "server_vad"
	VadThreshold      *float64 `json:"threshold,omitempty"`           // VAD threshold (0.0-1.0)
	PrefixPaddingMs   *int     `json:"prefix_padding_ms,omitempty"`   // Padding before speech (ms)
	SilenceDurationMs *int     `json:"silence_duration_ms,omitempty"` // Silence duration to end segment (ms)
}

ChunkingStrategy controls how the audio is cut into chunks for diarization

func (ChunkingStrategy) String ¶ added in v0.0.39

func (c ChunkingStrategy) String() string

type Client ¶

type Client struct {
	*client.Client
}

func New ¶

func New(apikey string, opts ...client.ClientOpt) (*Client, error)

New creates a new client, with the elevenslabs token

func (*Client) Transcribe ¶

func (c *Client) Transcribe(ctx context.Context, req TranscriptionRequest, streamfn func(schema.Event)) (*TranscriptionResponse, error)

Transcribe performs a transcription request in the language of the speech. If streamfn is provided, streaming mode is enabled and events will be passed to the callback.

func (*Client) Translate ¶

func (c *Client) Translate(ctx context.Context, req TranslationRequest) (*TranscriptionResponse, error)

Translate performs a transcription request and returns the result in english

type TranscriptionRequest ¶

type TranscriptionRequest struct {
	TranslationRequest
	Include                []string          `json:"include,omitempty"`                  // logprobs
	Language               *string           `json:"language,omitempty"`                 // Transcription only en, es, fr, etc.
	Timestamps             []string          `json:"timestamp_granularities,omitempty"`  // combination of word, segment
	ChunkingStrategy       *ChunkingStrategy `json:"chunking_strategy,omitempty"`        // "auto" or server_vad object
	KnownSpeakerNames      []string          `json:"known_speaker_names,omitempty"`      // Speaker names for diarization (up to 4)
	KnownSpeakerReferences []string          `json:"known_speaker_references,omitempty"` // Audio samples as data URLs (2-10 seconds each)
}

func (TranscriptionRequest) String ¶

func (s TranscriptionRequest) String() string

type TranscriptionResponse ¶

type TranscriptionResponse struct {
	Task     string                  `json:"task,omitempty"`
	Language string                  `json:"language,omitempty"`
	Duration schema.Timestamp        `json:"duration,omitempty"`
	Text     string                  `json:"text,omitempty"`
	Segment  []*TranscriptionSegment `json:"segments,omitempty" writer:",width:40,wrap"`
	Usage    *TranscriptionUsage     `json:"usage,omitempty"`
}

func (*TranscriptionResponse) Segments ¶

func (s *TranscriptionResponse) Segments() *schema.Transcription

func (TranscriptionResponse) String ¶

func (s TranscriptionResponse) String() string

func (*TranscriptionResponse) Unmarshal ¶

func (s *TranscriptionResponse) Unmarshal(header http.Header, r io.Reader) error

type TranscriptionSegment ¶

type TranscriptionSegment struct {
	Type             string           `json:"type,omitempty"` // Segment type (e.g., "transcript.text.segment" for diarized)
	Id               any              `json:"id"`             // Segment ID (int32 for verbose_json, string for diarized_json)
	Seek             uint32           `json:"seek,omitempty"`
	Start            schema.Timestamp `json:"start"`
	End              schema.Timestamp `json:"end"`
	Text             string           `json:"text"`
	Speaker          string           `json:"speaker,omitempty"`           // Speaker label for diarized transcription
	Tokens           []uint32         `json:"tokens,omitempty"`            // Array of token IDs for the text content.
	Temperature      *float64         `json:"temperature,omitempty"`       // Temperature parameter used for generating the segment.
	AvgLogProb       *float64         `json:"avg_logprob,omitempty"`       // Average logprob of the segment. If the value is lower than -1, consider the logprobs failed.
	CompressionRatio *float64         `json:"compression_ratio,omitempty"` // Compression ratio of the segment. If the value is greater than 2.4, consider the compression failed.
	NoSpeechProb     *float64         `json:"no_speech_prob,omitempty"`    // Probability of no speech in the segment. If the value is higher than 1.0 and the avg_logprob is below -1, consider this segment silent.
}

func (*TranscriptionSegment) IdAsInt32 ¶ added in v0.0.39

func (seg *TranscriptionSegment) IdAsInt32() int32

IdAsInt32 returns the segment ID as int32, handling both numeric and string IDs

type TranscriptionUsage ¶ added in v0.0.39

type TranscriptionUsage struct {
	Type    string `json:"type"`    // "duration"
	Seconds int    `json:"seconds"` // Billed duration in seconds
}

type TranslationRequest ¶

type TranslationRequest struct {
	Model       string         `json:"model"` // whisper-1
	File        multipart.File `json:"file"`
	Prompt      *string        `json:"prompt,omitempty"`
	Format      *string        `json:"response_format,omitempty"` // json, text, srt, verbose_json, or vtt
	Temperature *float64       `json:"temperature,omitempty"`     // 0.0 -> 1.0
}

func (TranslationRequest) String ¶

func (s TranslationRequest) String() string

Source Files ¶

View all Source files

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL