openai

package
v0.0.39 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jan 27, 2026 License: Apache-2.0 Imports: 14 Imported by: 0

Documentation

Index

Constants

View Source
const (
	Endpoint       = "https://api.openai.com/v1/"
	TranscribePath = "audio/transcriptions" // Endpoint for transcription
	TranslatePath  = "audio/translations"   // Endpoint for translation
)
View Source
const (
	FormatJson         = "json"
	FormatVerboseJson  = "verbose_json"
	FormatDiarizedJson = "diarized_json"
	FormatText         = "text"
	FormatSrt          = "srt"
	FormatVtt          = "vtt"
)
View Source
const (
	ChunkingStrategyAuto      = "auto"       // Auto chunking with VAD
	ChunkingStrategyServerVAD = "server_vad" // Server-side VAD chunking (required for diarization)
)

Variables

View Source
var (
	// Supported models for transcription and translation
	Models = []string{
		"whisper-1",
		"gpt-4o-mini-transcribe",
		"gpt-4o-mini-transcribe-2025-12-15",
		"gpt-4o-transcribe",
	}
	// Supported models for diarization
	DiarizeModels = []string{
		"gpt-4o-transcribe-diarize",
	}

	// Supported response formats
	Formats = []string{
		FormatText, FormatJson, FormatVerboseJson, FormatDiarizedJson, FormatSrt, FormatVtt,
	}
)

Functions

func LanguageCode

func LanguageCode(language string) (string, string)

LanguageCode returns the language and two-letter OpenAI language code for a given tuple, or an empty string if the language is not recognized.

Types

type ChunkingStrategy added in v0.0.39

type ChunkingStrategy struct {
	Type              string   `json:"type"`                          // "auto" or "server_vad"
	VadThreshold      *float64 `json:"threshold,omitempty"`           // VAD threshold (0.0-1.0)
	PrefixPaddingMs   *int     `json:"prefix_padding_ms,omitempty"`   // Padding before speech (ms)
	SilenceDurationMs *int     `json:"silence_duration_ms,omitempty"` // Silence duration to end segment (ms)
}

ChunkingStrategy controls how the audio is cut into chunks for diarization

func (ChunkingStrategy) String added in v0.0.39

func (c ChunkingStrategy) String() string

type Client

type Client struct {
	*client.Client
}

func New

func New(apikey string, opts ...client.ClientOpt) (*Client, error)

New creates a new client, with the elevenslabs token

func (*Client) Transcribe

func (c *Client) Transcribe(ctx context.Context, req TranscriptionRequest, streamfn func(schema.Event)) (*TranscriptionResponse, error)

Transcribe performs a transcription request in the language of the speech. If streamfn is provided, streaming mode is enabled and events will be passed to the callback.

func (*Client) Translate

Translate performs a transcription request and returns the result in english

type TranscriptionRequest

type TranscriptionRequest struct {
	TranslationRequest
	Include                []string          `json:"include,omitempty"`                  // logprobs
	Language               *string           `json:"language,omitempty"`                 // Transcription only en, es, fr, etc.
	Timestamps             []string          `json:"timestamp_granularities,omitempty"`  // combination of word, segment
	ChunkingStrategy       *ChunkingStrategy `json:"chunking_strategy,omitempty"`        // "auto" or server_vad object
	KnownSpeakerNames      []string          `json:"known_speaker_names,omitempty"`      // Speaker names for diarization (up to 4)
	KnownSpeakerReferences []string          `json:"known_speaker_references,omitempty"` // Audio samples as data URLs (2-10 seconds each)
}

func (TranscriptionRequest) String

func (s TranscriptionRequest) String() string

type TranscriptionResponse

type TranscriptionResponse struct {
	Task     string                  `json:"task,omitempty"`
	Language string                  `json:"language,omitempty"`
	Duration schema.Timestamp        `json:"duration,omitempty"`
	Text     string                  `json:"text,omitempty"`
	Segment  []*TranscriptionSegment `json:"segments,omitempty" writer:",width:40,wrap"`
	Usage    *TranscriptionUsage     `json:"usage,omitempty"`
}

func (*TranscriptionResponse) Segments

func (TranscriptionResponse) String

func (s TranscriptionResponse) String() string

func (*TranscriptionResponse) Unmarshal

func (s *TranscriptionResponse) Unmarshal(header http.Header, r io.Reader) error

type TranscriptionSegment

type TranscriptionSegment struct {
	Type             string           `json:"type,omitempty"` // Segment type (e.g., "transcript.text.segment" for diarized)
	Id               any              `json:"id"`             // Segment ID (int32 for verbose_json, string for diarized_json)
	Seek             uint32           `json:"seek,omitempty"`
	Start            schema.Timestamp `json:"start"`
	End              schema.Timestamp `json:"end"`
	Text             string           `json:"text"`
	Speaker          string           `json:"speaker,omitempty"`           // Speaker label for diarized transcription
	Tokens           []uint32         `json:"tokens,omitempty"`            // Array of token IDs for the text content.
	Temperature      *float64         `json:"temperature,omitempty"`       // Temperature parameter used for generating the segment.
	AvgLogProb       *float64         `json:"avg_logprob,omitempty"`       // Average logprob of the segment. If the value is lower than -1, consider the logprobs failed.
	CompressionRatio *float64         `json:"compression_ratio,omitempty"` // Compression ratio of the segment. If the value is greater than 2.4, consider the compression failed.
	NoSpeechProb     *float64         `json:"no_speech_prob,omitempty"`    // Probability of no speech in the segment. If the value is higher than 1.0 and the avg_logprob is below -1, consider this segment silent.
}

func (*TranscriptionSegment) IdAsInt32 added in v0.0.39

func (seg *TranscriptionSegment) IdAsInt32() int32

IdAsInt32 returns the segment ID as int32, handling both numeric and string IDs

type TranscriptionUsage added in v0.0.39

type TranscriptionUsage struct {
	Type    string `json:"type"`    // "duration"
	Seconds int    `json:"seconds"` // Billed duration in seconds
}

type TranslationRequest

type TranslationRequest struct {
	Model       string         `json:"model"` // whisper-1
	File        multipart.File `json:"file"`
	Prompt      *string        `json:"prompt,omitempty"`
	Format      *string        `json:"response_format,omitempty"` // json, text, srt, verbose_json, or vtt
	Temperature *float64       `json:"temperature,omitempty"`     // 0.0 -> 1.0
}

func (TranslationRequest) String

func (s TranslationRequest) String() string

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL