Documentation
¶
Index ¶
Constants ¶
View Source
const ( Endpoint = "https://api.openai.com/v1/" TranscribePath = "audio/transcriptions" // Endpoint for transcription TranslatePath = "audio/translations" // Endpoint for translation )
View Source
const ( FormatJson = "json" FormatVerboseJson = "verbose_json" FormatDiarizedJson = "diarized_json" FormatText = "text" FormatSrt = "srt" FormatVtt = "vtt" )
View Source
const ( ChunkingStrategyAuto = "auto" // Auto chunking with VAD ChunkingStrategyServerVAD = "server_vad" // Server-side VAD chunking (required for diarization) )
Variables ¶
View Source
var ( // Supported models for transcription and translation Models = []string{ "whisper-1", "gpt-4o-mini-transcribe", "gpt-4o-mini-transcribe-2025-12-15", "gpt-4o-transcribe", } // Supported models for diarization DiarizeModels = []string{ "gpt-4o-transcribe-diarize", } // Supported response formats Formats = []string{ FormatText, FormatJson, FormatVerboseJson, FormatDiarizedJson, FormatSrt, FormatVtt, } )
Functions ¶
func LanguageCode ¶
LanguageCode returns the language and two-letter OpenAI language code for a given tuple, or an empty string if the language is not recognized.
Types ¶
type ChunkingStrategy ¶ added in v0.0.39
type ChunkingStrategy struct {
Type string `json:"type"` // "auto" or "server_vad"
VadThreshold *float64 `json:"threshold,omitempty"` // VAD threshold (0.0-1.0)
PrefixPaddingMs *int `json:"prefix_padding_ms,omitempty"` // Padding before speech (ms)
SilenceDurationMs *int `json:"silence_duration_ms,omitempty"` // Silence duration to end segment (ms)
}
ChunkingStrategy controls how the audio is cut into chunks for diarization
func (ChunkingStrategy) String ¶ added in v0.0.39
func (c ChunkingStrategy) String() string
type Client ¶
func (*Client) Transcribe ¶
func (c *Client) Transcribe(ctx context.Context, req TranscriptionRequest, streamfn func(schema.Event)) (*TranscriptionResponse, error)
Transcribe performs a transcription request in the language of the speech. If streamfn is provided, streaming mode is enabled and events will be passed to the callback.
func (*Client) Translate ¶
func (c *Client) Translate(ctx context.Context, req TranslationRequest) (*TranscriptionResponse, error)
Translate performs a transcription request and returns the result in english
type TranscriptionRequest ¶
type TranscriptionRequest struct {
TranslationRequest
Include []string `json:"include,omitempty"` // logprobs
Language *string `json:"language,omitempty"` // Transcription only en, es, fr, etc.
Timestamps []string `json:"timestamp_granularities,omitempty"` // combination of word, segment
ChunkingStrategy *ChunkingStrategy `json:"chunking_strategy,omitempty"` // "auto" or server_vad object
KnownSpeakerNames []string `json:"known_speaker_names,omitempty"` // Speaker names for diarization (up to 4)
KnownSpeakerReferences []string `json:"known_speaker_references,omitempty"` // Audio samples as data URLs (2-10 seconds each)
}
func (TranscriptionRequest) String ¶
func (s TranscriptionRequest) String() string
type TranscriptionResponse ¶
type TranscriptionResponse struct {
Task string `json:"task,omitempty"`
Language string `json:"language,omitempty"`
Duration schema.Timestamp `json:"duration,omitempty"`
Text string `json:"text,omitempty"`
Segment []*TranscriptionSegment `json:"segments,omitempty" writer:",width:40,wrap"`
Usage *TranscriptionUsage `json:"usage,omitempty"`
}
func (*TranscriptionResponse) Segments ¶
func (s *TranscriptionResponse) Segments() *schema.Transcription
func (TranscriptionResponse) String ¶
func (s TranscriptionResponse) String() string
type TranscriptionSegment ¶
type TranscriptionSegment struct {
Type string `json:"type,omitempty"` // Segment type (e.g., "transcript.text.segment" for diarized)
Id any `json:"id"` // Segment ID (int32 for verbose_json, string for diarized_json)
Seek uint32 `json:"seek,omitempty"`
Start schema.Timestamp `json:"start"`
End schema.Timestamp `json:"end"`
Text string `json:"text"`
Speaker string `json:"speaker,omitempty"` // Speaker label for diarized transcription
Tokens []uint32 `json:"tokens,omitempty"` // Array of token IDs for the text content.
Temperature *float64 `json:"temperature,omitempty"` // Temperature parameter used for generating the segment.
AvgLogProb *float64 `json:"avg_logprob,omitempty"` // Average logprob of the segment. If the value is lower than -1, consider the logprobs failed.
CompressionRatio *float64 `json:"compression_ratio,omitempty"` // Compression ratio of the segment. If the value is greater than 2.4, consider the compression failed.
NoSpeechProb *float64 `json:"no_speech_prob,omitempty"` // Probability of no speech in the segment. If the value is higher than 1.0 and the avg_logprob is below -1, consider this segment silent.
}
func (*TranscriptionSegment) IdAsInt32 ¶ added in v0.0.39
func (seg *TranscriptionSegment) IdAsInt32() int32
IdAsInt32 returns the segment ID as int32, handling both numeric and string IDs
type TranscriptionUsage ¶ added in v0.0.39
type TranslationRequest ¶
type TranslationRequest struct {
Model string `json:"model"` // whisper-1
File multipart.File `json:"file"`
Prompt *string `json:"prompt,omitempty"`
Format *string `json:"response_format,omitempty"` // json, text, srt, verbose_json, or vtt
Temperature *float64 `json:"temperature,omitempty"` // 0.0 -> 1.0
}
func (TranslationRequest) String ¶
func (s TranslationRequest) String() string
Click to show internal directories.
Click to hide internal directories.