Documentation
¶
Overview ¶
Package ctxmgr provides automatic context window management for Celeste CLI. The package name is ctxmgr (not context) to avoid collision with the stdlib context package. Import path: github.com/whykusanagi/celeste-cli/cmd/celeste/context
Index ¶
- Constants
- Variables
- func CapToolResult(result string, maxBytes int, sessionID, toolCallID, baseDir string) (capped string, wasCapped bool, err error)
- func CompactSnip(text string, maxBytes int) string
- func EstimateSummarySavings(messages []ChatMessage, count int) (before, after int)
- func EstimateTokens(text string) int
- func FormatCompactionResult(r CompactionResult) string
- func FormatTokenCount(tokens int) string
- func GetModelLimit(model string) int
- func GetModelLimitWithOverride(model string, configOverride int) int
- func ToolResultsBaseDir() (string, error)
- type ChatMessage
- type CompactionEngine
- func (ce *CompactionEngine) CompactProactive(ctx context.Context, messages []ChatMessage, budget *TokenBudget) ([]ChatMessage, CompactionResult, error)
- func (ce *CompactionEngine) CompactReactive(ctx context.Context, messages []ChatMessage, budget *TokenBudget) ([]ChatMessage, CompactionResult, error)
- type CompactionResult
- type LLMClient
- type LLMSummarizer
- type Summarizer
- type TokenBudget
- func (tb *TokenBudget) AddTurn(promptTokens, completionTokens int)
- func (tb *TokenBudget) Available() int
- func (tb *TokenBudget) GetUsagePercent() float64
- func (tb *TokenBudget) GetWarningLevel() string
- func (tb *TokenBudget) IncrementCompactCount()
- func (tb *TokenBudget) SetHistoryTokens(tokens int)
- func (tb *TokenBudget) ShouldCompactProactive(interval int) bool
- func (tb *TokenBudget) ShouldCompactReactive() bool
- func (tb *TokenBudget) Summary() string
- func (tb *TokenBudget) TotalUsed() int
Constants ¶
const ( // DefaultMaxToolResultBytes is the maximum size in bytes for a single tool // result before it gets capped and spilled to disk. 32KB. DefaultMaxToolResultBytes = 128 * 1024 )
Variables ¶
var ModelLimits = map[string]int{
"gpt-4.1": 1050000,
"gpt-4.1-mini": 400000,
"gpt-4.1-nano": 400000,
"gpt-5.3-codex": 1050000,
"gpt-5.4": 1050000,
"gpt-5.4-mini": 400000,
"gpt-5.4-nano": 400000,
"gpt-5.4-pro": 1050000,
"o3": 1050000,
"o4-mini": 400000,
"claude-opus-4-6": 1000000,
"claude-sonnet-4-6": 1000000,
"claude-haiku-4-5": 200000,
"grok-4-1-fast": 2000000,
"grok-4-1-fast-reasoning": 2000000,
"grok-4-1-fast-non-reasoning": 2000000,
"grok-4.20-0309-reasoning": 2000000,
"grok-4.20-0309-non-reasoning": 2000000,
"grok-4.20-multi-agent-0309": 2000000,
"grok-code-fast-1": 2000000,
"venice-uncensored": 32000,
"venice-uncensored-role-play": 128000,
"deepseek-v3.2": 160000,
"qwen3-coder-480b-a35b-instruct": 256000,
"qwen3-coder-480b-a35b-instruct-turbo": 256000,
"qwen3-235b-a22b-thinking-2507": 128000,
"kimi-k2-5": 256000,
"zai-org-glm-4.7": 198000,
"mistral-small-3-2-24b-instruct": 256000,
"llama-3.3-70b": 128000,
"minimax-m25": 198000,
"default": 8192,
}
ModelLimits maps model names to their context window sizes in tokens. Migrated from config/tokens.go.
Functions ¶
func CapToolResult ¶
func CapToolResult(result string, maxBytes int, sessionID, toolCallID, baseDir string) (capped string, wasCapped bool, err error)
CapToolResult checks whether a tool result exceeds maxBytes. If it does, the full result is written to disk at:
{baseDir}/{sessionID}/{toolCallID}.txt
and a truncated preview is returned containing the first portion, a notice with the file path, and the last previewTailBytes of the result.
If baseDir is empty, ToolResultsBaseDir() is used.
Returns:
- capped: the (possibly truncated) result string to send to the model
- wasCapped: true if the result was truncated
- err: any I/O error from writing the spill file
func CompactSnip ¶
CompactSnip performs inline truncation of a single string without writing to disk. This is a lightweight alternative to CapToolResult when you do not need persistent storage of the full result (e.g., for intermediate processing steps).
func EstimateSummarySavings ¶
func EstimateSummarySavings(messages []ChatMessage, count int) (before, after int)
EstimateSummarySavings estimates tokens before and after summarization for the given message count. Useful for previewing compaction impact without actually calling the LLM.
func EstimateTokens ¶
EstimateTokens approximates token count from text length. Uses the rough heuristic of 4 characters per token.
func FormatCompactionResult ¶
func FormatCompactionResult(r CompactionResult) string
FormatCompactionResult creates a user-friendly summary of what compaction did.
func FormatTokenCount ¶
FormatTokenCount formats a token count with K/M suffix for display.
func GetModelLimit ¶
GetModelLimit returns the token limit for a model name. Falls back to the "default" entry if the model is not found.
func GetModelLimitWithOverride ¶
GetModelLimitWithOverride returns the token limit for a model, using the config override if it is positive.
func ToolResultsBaseDir ¶
ToolResultsBaseDir returns the base directory for spilled tool results. Default: ~/.celeste/tool-results
Types ¶
type ChatMessage ¶
type ChatMessage struct {
Role string
Content string
ToolCallID string
Name string
Timestamp time.Time
}
ChatMessage mirrors tui.ChatMessage to avoid a circular import. The TUI layer converts between the two at integration boundaries.
type CompactionEngine ¶
type CompactionEngine struct {
// RecentTurnsToKeep controls how many recent user-assistant turn pairs
// are protected from compaction. Default: 4.
RecentTurnsToKeep int
// contains filtered or unexported fields
}
CompactionEngine orchestrates conversation compaction using a Summarizer.
func NewCompactionEngine ¶
func NewCompactionEngine(summarizer Summarizer) *CompactionEngine
NewCompactionEngine creates a CompactionEngine with the given summarizer.
func (*CompactionEngine) CompactProactive ¶
func (ce *CompactionEngine) CompactProactive(ctx context.Context, messages []ChatMessage, budget *TokenBudget) ([]ChatMessage, CompactionResult, error)
CompactProactive performs proactive compaction. The logic is identical to reactive compaction but is triggered on a turn-count interval rather than a usage threshold. The caller is responsible for checking budget.ShouldCompactProactive() before calling this.
func (*CompactionEngine) CompactReactive ¶
func (ce *CompactionEngine) CompactReactive(ctx context.Context, messages []ChatMessage, budget *TokenBudget) ([]ChatMessage, CompactionResult, error)
CompactReactive performs reactive compaction when the context budget crosses the 80% threshold. It summarizes the oldest messages while keeping the most recent RecentTurnsToKeep turn pairs intact.
The messages slice should be the full conversation history (excluding system prompt, which is sent separately). The budget is used to track compaction count and recalculate usage after compaction.
type CompactionResult ¶
type CompactionResult struct {
MessagesBefore int
MessagesAfter int
TokensBefore int // Estimated tokens in compacted messages
TokensAfter int // Estimated tokens in the summary replacement
TurnsCompacted int // Number of user-assistant turn pairs summarized
}
CompactionResult describes what a compaction operation did.
type LLMClient ¶
type LLMClient interface {
// SendSummarizationRequest sends a system+user prompt pair to the LLM
// and returns the text response. No tool calling, no streaming.
SendSummarizationRequest(ctx context.Context, systemPrompt, userPrompt string) (string, error)
}
LLMClient is the minimal interface required to call an LLM for summarization. This avoids importing the llm package directly, preventing circular deps. The concrete implementation in main.go or wherever the LLM client is created should satisfy this interface.
type LLMSummarizer ¶
type LLMSummarizer struct {
// contains filtered or unexported fields
}
LLMSummarizer implements Summarizer by calling an LLM to generate summaries. Migrated from llm/summarize.go.
func NewLLMSummarizer ¶
func NewLLMSummarizer(client LLMClient) *LLMSummarizer
NewLLMSummarizer creates a new LLM-backed summarizer.
func (*LLMSummarizer) Summarize ¶
func (s *LLMSummarizer) Summarize(ctx context.Context, messages []ChatMessage) (string, error)
Summarize creates a summary of the given messages by sending them to the LLM.
type Summarizer ¶
type Summarizer interface {
// Summarize takes a sequence of messages and returns a text summary
// that preserves key context, decisions, and technical details.
Summarize(ctx context.Context, messages []ChatMessage) (string, error)
}
Summarizer creates concise summaries of conversation message sequences. This is the core interface used by CompactionEngine.
type TokenBudget ¶
type TokenBudget struct {
// Capacity
ModelLimit int // Total context window for the model
// Fixed allocations (set once at session start, updated rarely)
SystemPromptTokens int // Tokens consumed by system prompt
ToolDefinitionTokens int // Tokens consumed by tool/function schemas
// Dynamic tracking
HistoryTokens int // Tokens consumed by conversation history (all messages)
LastPromptTokens int // Prompt tokens from last API response
LastCompTokens int // Completion tokens from last API response
// Counters
TurnCount int // Number of user-assistant turn pairs completed
CompactCount int // Number of times compaction has been triggered
// contains filtered or unexported fields
}
TokenBudget tracks token usage across all components of a conversation. It provides fine-grained tracking beyond a simple current/max counter, separating system prompt, tool definitions, conversation history, and per-turn usage to enable intelligent compaction decisions.
func NewTokenBudget ¶
func NewTokenBudget(modelLimit, systemPromptTokens, toolDefTokens int) *TokenBudget
NewTokenBudget creates a TokenBudget for the given model. systemPromptTokens and toolDefTokens represent the fixed token overhead that is sent with every request.
func NewTokenBudgetForModel ¶
func NewTokenBudgetForModel(model string, systemPromptTokens, toolDefTokens int) *TokenBudget
NewTokenBudgetForModel creates a TokenBudget by looking up the model name in ModelLimits. If the model is not found, the "default" limit is used.
func (*TokenBudget) AddTurn ¶
func (tb *TokenBudget) AddTurn(promptTokens, completionTokens int)
AddTurn records token usage from an API response and increments the turn counter. promptTokens and completionTokens come from the API's usage field. If the API does not return usage, callers should pass estimates.
func (*TokenBudget) Available ¶
func (tb *TokenBudget) Available() int
Available returns tokens remaining before hitting the model limit. This is the space available for the next prompt + completion.
func (*TokenBudget) GetUsagePercent ¶
func (tb *TokenBudget) GetUsagePercent() float64
GetUsagePercent returns usage as a fraction from 0.0 to 1.0.
func (*TokenBudget) GetWarningLevel ¶
func (tb *TokenBudget) GetWarningLevel() string
GetWarningLevel returns a severity string based on current usage. Returns "ok", "warn" (75%), "caution" (85%), or "critical" (95%).
func (*TokenBudget) IncrementCompactCount ¶
func (tb *TokenBudget) IncrementCompactCount()
IncrementCompactCount records that a compaction occurred.
func (*TokenBudget) SetHistoryTokens ¶
func (tb *TokenBudget) SetHistoryTokens(tokens int)
SetHistoryTokens directly sets the history token count. Use this when recalculating after compaction or when API usage data is unavailable.
func (*TokenBudget) ShouldCompactProactive ¶
func (tb *TokenBudget) ShouldCompactProactive(interval int) bool
ShouldCompactProactive returns true when the turn count has reached a multiple of the given interval AND usage is above 50%. This catches slow-growing sessions before they hit the reactive threshold.
func (*TokenBudget) ShouldCompactReactive ¶
func (tb *TokenBudget) ShouldCompactReactive() bool
ShouldCompactReactive returns true when usage has crossed the reactive compaction threshold (80% of model limit). This is checked after every API response.
func (*TokenBudget) Summary ¶
func (tb *TokenBudget) Summary() string
Summary returns a human-readable one-line summary of the budget state.
func (*TokenBudget) TotalUsed ¶
func (tb *TokenBudget) TotalUsed() int
TotalUsed returns the total tokens currently consumed across all components.