Documentation
¶
Overview ¶
aether/errors.go
This file provides public accessors for Aether's structured error kinds. Internally, we use an internal/errors package to represent specific failure categories such as configuration issues or robots.txt violations. This wrapper re-exports those concepts in a stable way.
aether/fetch.go
This file exposes the Fetch method on Client, which performs a robots.txt-compliant HTTP GET using Aether's internal HTTP client.
Index ¶
- Constants
- Variables
- func DarkTheme() display.Theme
- func DefaultTheme() display.Theme
- func MinimalTheme() display.Theme
- func PaperTheme() display.Theme
- func Version() string
- type Article
- type BatchItemResult
- type BatchOptions
- type BatchResult
- type CacheOption
- type Client
- func (c *Client) Batch(ctx context.Context, urls []string, opts BatchOptions) (*BatchResult, error)
- func (c *Client) Crawl(ctx context.Context, startURL string, opts CrawlOptions) error
- func (c *Client) Detect(ctx context.Context, url string) (*DetectionResult, error)
- func (c *Client) EffectiveConfig() Config
- func (c *Client) ExtractArticle(ctx context.Context, url string) (*Article, error)
- func (c *Client) ExtractArticleFromHTML(html []byte, url string) (*Article, error)
- func (c *Client) Fetch(ctx context.Context, rawURL string, opts ...FetchOption) (*FetchResult, error)
- func (c *Client) FetchJSON(ctx context.Context, url string, dest any) error
- func (c *Client) FetchRSS(ctx context.Context, url string) (*Feed, error)
- func (c *Client) FetchRaw(ctx context.Context, url string) ([]byte, http.Header, error)
- func (c *Client) FetchText(ctx context.Context, url string) (string, http.Header, error)
- func (c *Client) FindDisplayPlugin(format string) (plugins.DisplayPlugin, bool)
- func (c *Client) GitHubReadme(ctx context.Context, owner, repo, ref string) (*GitHubReadme, error)
- func (c *Client) GovernmentPress(ctx context.Context, limit int) ([]GovernmentPressRelease, error)
- func (c *Client) HackerNewsTopStories(ctx context.Context, limit int) ([]HackerNewsStory, error)
- func (c *Client) ListDisplayFormats() []string
- func (c *Client) MarshalBTON(sr *SearchResult) ([]byte, error)
- func (c *Client) MarshalBTONFromModel(doc *model.Document) ([]byte, error)
- func (c *Client) MarshalSearchResultJSON(sr *SearchResult) ([]byte, error)
- func (c *Client) MarshalSearchResultTOON(sr *SearchResult) ([]byte, error)
- func (c *Client) MarshalTOON(sr *SearchResult) ([]byte, error)
- func (c *Client) MarshalTOONFromModel(doc *model.Document) ([]byte, error)
- func (c *Client) MarshalTOONLite(sr *SearchResult) ([]byte, error)
- func (c *Client) MarshalTOONLiteFromModel(doc *model.Document) ([]byte, error)
- func (c *Client) MarshalTOONLitePretty(sr *SearchResult) ([]byte, error)
- func (c *Client) MarshalTOONPretty(sr *SearchResult) ([]byte, error)
- func (c *Client) MarshalTOONPrettyFromModel(doc *model.Document) ([]byte, error)
- func (c *Client) NormalizeSearchResult(sr *SearchResult) *NormalizedDocument
- func (c *Client) ParseHTML(html []byte) (*ParsedHTML, error)
- func (c *Client) ParseRSS(xmlBytes []byte) (*Feed, error)
- func (c *Client) RegisterDisplayPlugin(p plugins.DisplayPlugin) error
- func (c *Client) RegisterSourcePlugin(p plugins.SourcePlugin) error
- func (c *Client) RegisterTransformPlugin(p plugins.TransformPlugin) error
- func (c *Client) Render(ctx context.Context, format string, doc *NormalizedDocument) ([]byte, error)
- func (c *Client) RenderMarkdown(doc *NormalizedDocument) string
- func (c *Client) RenderMarkdownWithTheme(doc *NormalizedDocument, theme display.Theme) string
- func (c *Client) RenderPreview(doc *NormalizedDocument) string
- func (c *Client) RenderPreviewWithTheme(doc *NormalizedDocument, theme display.Theme) string
- func (c *Client) RenderSearchResult(ctx context.Context, format string, sr *SearchResult) ([]byte, error)
- func (c *Client) RenderTable(header []string, rows [][]string) string
- func (c *Client) RenderTableWithTheme(header []string, rows [][]string, theme display.Theme) string
- func (c *Client) Search(ctx context.Context, query string) (*SearchResult, error)
- func (c *Client) SmartQuery(query string) *SmartQueryPlan
- func (c *Client) StreamFeedJSONL(ctx context.Context, w io.Writer, feed *Feed) error
- func (c *Client) StreamNormalizedJSONL(ctx context.Context, w io.Writer, doc *NormalizedDocument) error
- func (c *Client) StreamSearchResultJSONL(ctx context.Context, w io.Writer, sr *SearchResult) error
- func (c *Client) StreamSearchResultTOON(ctx context.Context, w io.Writer, sr *SearchResult) error
- func (c *Client) StreamTOON(ctx context.Context, w io.Writer, doc *NormalizedDocument) error
- func (c *Client) ToNormalized(sr *SearchResult) *model.Document
- func (c *Client) ToTOON(sr *SearchResult) *toon.Document
- func (c *Client) ToTOONFromModel(doc *model.Document) *toon.Document
- func (c *Client) UnmarshalBTON(b []byte) (*toon.Document, error)
- func (c *Client) WeatherAt(ctx context.Context, lat, lon float64, hours int) ([]Weather, error)
- func (c *Client) WhiteHouseRecentPosts(ctx context.Context, limit int) ([]WhiteHousePost, error)
- func (c *Client) WikidataLookup(ctx context.Context, name string) (*WikidataEntity, error)
- func (c *Client) WikipediaSummary(ctx context.Context, title string) (*WikiSummary, error)
- type Config
- type CrawlOptions
- type CrawlVisitor
- type CrawlVisitorFunc
- type CrawledPage
- type DetectionResult
- type Error
- type ErrorKind
- type Feed
- type FeedItem
- type FetchOption
- type FetchOptions
- type FetchResult
- type GitHubReadme
- type GovernmentPressRelease
- type HackerNewsStory
- type Heading
- type JSONLObject
- type Link
- type NormalizedDocument
- type Option
- type Paragraph
- type ParsedHTML
- type QueryIntent
- type SearchDocument
- type SearchDocumentKind
- type SearchIntent
- type SearchPlan
- type SearchResult
- type SmartQueryPlan
- type Weather
- type WhiteHousePost
- type WikiSummary
- type WikidataEntity
Constants ¶
const DefaultUserAgent = "AetherBot/1.0 (+https://github.com/Nibir1/Aether)"
DefaultUserAgent is the default HTTP User-Agent string that Aether uses when making outbound requests. It includes the repository for transparency.
Variables ¶
var ErrNilClient = errors.New("aether: nil client")
ErrNilClient is returned when methods are called on a nil *Client receiver. This prevents panics and provides a predictable error signal.
Functions ¶
func DefaultTheme ¶
func MinimalTheme ¶
func PaperTheme ¶
Types ¶
type Article ¶
type Article struct {
URL string
Title string
Byline string
Content string
HTML string
Excerpt string
Meta map[string]string
}
Article is the public article representation returned by Aether.
Content is the plain-text main content. HTML is a sanitized HTML fragment of the main content. Excerpt is a short summary derived from the article body. Meta contains document metadata extracted from <meta> tags.
type BatchItemResult ¶
BatchItemResult represents the outcome of a single fetch in the batch.
type BatchOptions ¶
type BatchOptions struct {
// Maximum concurrent workers. If <= 0, defaults to 4.
Concurrency int
// Optional additional headers applied to each fetch request.
Headers http.Header
}
BatchOptions configures the behavior of batch fetch operations.
type BatchResult ¶
type BatchResult struct {
Results []BatchItemResult
}
BatchResult contains results for all fetched URLs in the same order as the input slice.
type CacheOption ¶
CacheOption configures Aether caching behavior.
These options modify the internal *config.Config used by the Client. They must be applied when constructing the Aether Client via NewClient.
func WithFileCache ¶
func WithFileCache(dir string, ttl time.Duration) CacheOption
WithFileCache enables the persistent file-backed cache.
dir must be a writable directory for Aether to store its cached files. ttl defines how long entries remain valid.
func WithMemoryCache ¶
func WithMemoryCache(maxEntries int, ttl time.Duration) CacheOption
WithMemoryCache enables the in-memory LRU cache.
maxEntries sets the maximum number of items retained in memory. ttl sets the time-to-live for cached entries.
func WithRedisCache ¶
func WithRedisCache(addr string, ttl time.Duration) CacheOption
WithRedisCache enables Redis caching.
addr is the Redis server address (e.g., "localhost:6379"). ttl controls how long redis entries remain valid.
type Client ¶
type Client struct {
// contains filtered or unexported fields
}
Client is the main public interface for using Aether.
The Client owns:
- unified composite cache
- robots.txt-compliant HTTP fetcher
- internal OpenAPI client
- public plugin registry
Future stages extend Client with:
- full Aether.Search federation
- Crawl, Batch
- Normalize (JSON+TOON)
- Display system (Markdown, tables, themes)
func NewClient ¶
NewClient constructs a new Aether Client with optional configuration.
Pipeline:
- Load default internal config
- Apply user-specified Option functions
- Ensure User-Agent is set
- Initialize logger
- Initialize unified composite cache
- Initialize HTTP fetcher (robots.txt + caching)
- Initialize internal OpenAPI client
- Initialize plugin registry
func (*Client) Batch ¶
func (c *Client) Batch(ctx context.Context, urls []string, opts BatchOptions) (*BatchResult, error)
Batch fetches a list of URLs using Aether’s internal robots.txt-compliant fetcher and returns structured results. The function preserves input ordering in the result set.
func (*Client) Crawl ¶
Crawl executes a legal, robots.txt-compliant crawl starting at startURL, using the provided options.
The crawl stops when:
- frontier is empty, or
- MaxPages is reached, or
- context is canceled, or
- Visitor returns an error.
func (*Client) Detect ¶
Detect runs a full fetch (robots.txt-compliant), sniffs content type, parses HTML only when needed, and returns a structured detection result.
func (*Client) EffectiveConfig ¶
EffectiveConfig returns the final public configuration in effect.
func (*Client) ExtractArticle ¶
ExtractArticle fetches the given URL (respecting robots.txt) and runs article extraction on the retrieved HTML.
This is a convenience wrapper around Fetch + ExtractArticleFromHTML.
func (*Client) ExtractArticleFromHTML ¶
ExtractArticleFromHTML extracts the main article content from raw HTML.
url is optional but recommended; it is stored in the Article result and may be used by future features (e.g. canonical URL resolution).
func (*Client) Fetch ¶
func (c *Client) Fetch(ctx context.Context, rawURL string, opts ...FetchOption) (*FetchResult, error)
Fetch performs a robots.txt-compliant HTTP GET for the given URL.
It automatically:
- respects robots.txt rules using the configured User-Agent,
- applies polite per-host concurrency limits,
- performs simple in-memory response caching,
- retries transient failures with exponential backoff.
The result is a FetchResult containing status, headers, body and timestamp of retrieval.
func (*Client) FetchJSON ¶
FetchJSON performs a robots.txt-compliant fetch and unmarshals the JSON response into the provided destination struct pointer.
Example:
var out MyStruct
if err := cli.FetchJSON(ctx, "https://example.com/api", &out); err != nil {
...
}
This helper is used internally by Aether OpenAPI integrations and is ideal for plugins interacting with public JSON APIs.
func (*Client) FetchRSS ¶
FetchRSS fetches and parses an RSS/Atom feed, respecting robots.txt.
This is the safe and preferred method for real-world usage.
Example:
feed, err := client.FetchRSS(ctx, "https://example.com/feed.rss")
func (*Client) FetchRaw ¶
FetchRaw performs a robots.txt-compliant HTTP GET and returns:
- raw response body bytes
- http.Header for metadata
- error if the fetch fails
The caller receives the untouched response body, suitable for:
- JSON decoding
- image/binary processing
- plugin-level custom parsing
Plugins should ALWAYS use this instead of performing HTTP manually.
func (*Client) FetchText ¶
FetchText performs a robots.txt-compliant fetch and returns the UTF-8 interpretation of the response body.
This helper is ideal for:
- Markdown files
- README files
- Plain text APIs
- HTML extraction by plugins
It returns:
- decoded string
- http.Header
- error
func (*Client) FindDisplayPlugin ¶
func (c *Client) FindDisplayPlugin(format string) (plugins.DisplayPlugin, bool)
FindDisplayPlugin returns a display plugin by format ("html", "pdf", "ansi", …).
func (*Client) GitHubReadme ¶
func (*Client) GovernmentPress ¶
func (*Client) HackerNewsTopStories ¶
func (*Client) ListDisplayFormats ¶
ListDisplayFormats lists all registered plugin-provided formats. Example output: []string{"html", "ansi", "pdf"}
func (*Client) MarshalBTON ¶
func (c *Client) MarshalBTON(sr *SearchResult) ([]byte, error)
MarshalBTON serializes a SearchResult into BT0N binary format.
func (*Client) MarshalBTONFromModel ¶
MarshalBTONFromModel serializes a model.Document into BT0N.
func (*Client) MarshalSearchResultJSON ¶
func (c *Client) MarshalSearchResultJSON(sr *SearchResult) ([]byte, error)
MarshalSearchResultJSON returns pretty-printed JSON for a SearchResult.
func (*Client) MarshalSearchResultTOON ¶
func (c *Client) MarshalSearchResultTOON(sr *SearchResult) ([]byte, error)
MarshalSearchResultTOON returns pretty-printed TOON JSON for a SearchResult.
func (*Client) MarshalTOON ¶
func (c *Client) MarshalTOON(sr *SearchResult) ([]byte, error)
MarshalTOON serializes a SearchResult into compact TOON JSON.
func (*Client) MarshalTOONFromModel ¶
MarshalTOONFromModel serializes a normalized Document into compact TOON JSON.
func (*Client) MarshalTOONLite ¶
func (c *Client) MarshalTOONLite(sr *SearchResult) ([]byte, error)
MarshalTOONLite serializes SearchResult → normalized → TOON → lite JSON.
func (*Client) MarshalTOONLiteFromModel ¶
Direct model.Document variant
func (*Client) MarshalTOONLitePretty ¶
func (c *Client) MarshalTOONLitePretty(sr *SearchResult) ([]byte, error)
MarshalTOONLitePretty pretty prints Lite JSON.
func (*Client) MarshalTOONPretty ¶
func (c *Client) MarshalTOONPretty(sr *SearchResult) ([]byte, error)
MarshalTOONPretty serializes a SearchResult into pretty-printed TOON JSON.
func (*Client) MarshalTOONPrettyFromModel ¶
MarshalTOONPrettyFromModel serializes a normalized Document into pretty JSON.
func (*Client) NormalizeSearchResult ¶
func (c *Client) NormalizeSearchResult(sr *SearchResult) *NormalizedDocument
NormalizeSearchResult converts a public SearchResult into a canonical normalized Document and applies TransformPlugins (if any).
func (*Client) ParseHTML ¶
func (c *Client) ParseHTML(html []byte) (*ParsedHTML, error)
ParseHTML parses raw HTML bytes into a ParsedHTML structure.
This method does not perform any network operations and does not consult robots.txt. It is intended for HTML you already have, such as the body of a FetchResult.
func (*Client) ParseRSS ¶
ParseRSS parses raw RSS/Atom XML bytes into a public Feed.
This method does NOT fetch or check robots.txt; it only parses. Use FetchRSS() to fetch and parse in one call.
func (*Client) RegisterDisplayPlugin ¶
func (c *Client) RegisterDisplayPlugin(p plugins.DisplayPlugin) error
RegisterDisplayPlugin registers a display plugin.
Display plugins output formats other than Markdown (e.g., HTML, ANSI, PDF). Later stages will integrate these into Aether’s rendering subsystem.
func (*Client) RegisterSourcePlugin ¶
func (c *Client) RegisterSourcePlugin(p plugins.SourcePlugin) error
RegisterSourcePlugin registers a public SourcePlugin with the Client.
Usage:
cli := aether.NewClient(...) err := cli.RegisterSourcePlugin(myHNPlugin)
Strict naming rule:
If another plugin with the same name already exists, an error is returned.
func (*Client) RegisterTransformPlugin ¶
func (c *Client) RegisterTransformPlugin(p plugins.TransformPlugin) error
RegisterTransformPlugin registers a transform plugin.
Transform plugins take a normalized Document and return a modified/enriched version. They run after built-in normalization in the Search pipeline.
func (*Client) Render ¶
func (c *Client) Render(ctx context.Context, format string, doc *NormalizedDocument) ([]byte, error)
Render renders a normalized document using either a built-in format or a DisplayPlugin. Strict mode (Option B):
Built-in formats:
- "markdown", "md"
- "preview"
- "text" (alias of markdown)
All other formats MUST come from DisplayPlugins. If no plugin exists → error.
func (*Client) RenderMarkdown ¶
func (c *Client) RenderMarkdown(doc *NormalizedDocument) string
RenderMarkdown renders a normalized document with the default theme.
func (*Client) RenderMarkdownWithTheme ¶
func (c *Client) RenderMarkdownWithTheme(doc *NormalizedDocument, theme display.Theme) string
RenderMarkdownWithTheme renders a normalized document with a custom theme.
func (*Client) RenderPreview ¶
func (c *Client) RenderPreview(doc *NormalizedDocument) string
func (*Client) RenderPreviewWithTheme ¶
func (c *Client) RenderPreviewWithTheme(doc *NormalizedDocument, theme display.Theme) string
func (*Client) RenderSearchResult ¶
func (c *Client) RenderSearchResult(ctx context.Context, format string, sr *SearchResult) ([]byte, error)
RenderSearchResult normalizes a SearchResult and passes it to Render().
func (*Client) RenderTableWithTheme ¶
func (*Client) Search ¶
Search runs Aether's high-level search pipeline for the given query.
Behavior:
- URL queries → robots.txt-compliant fetch → SearchDocument
- Non-URL queries:
- Try SourcePlugins (in registration order)
- Fallback to Wikipedia summary via internal OpenAPI client
TODO (future stages): expand SmartQuery classification, integrate RSS auto-detection, transform plugins, and richer federation.
func (*Client) SmartQuery ¶
func (c *Client) SmartQuery(query string) *SmartQueryPlan
SmartQuery analyzes a natural-language query and returns a routing plan.
This is a pure function of the query string: it does not hit the network, does not fetch content, and does not depend on remote state. It is safe to call frequently and suitable for unit testing.
func (*Client) StreamFeedJSONL ¶
StreamFeedJSONL streams each FeedItem as a JSONL object.
func (*Client) StreamNormalizedJSONL ¶
func (c *Client) StreamNormalizedJSONL(ctx context.Context, w io.Writer, doc *NormalizedDocument) error
StreamNormalizedJSONL streams a normalized Document as JSONL.
func (*Client) StreamSearchResultJSONL ¶
StreamSearchResultJSONL normalizes a SearchResult and streams JSONL.
func (*Client) StreamSearchResultTOON ¶
StreamSearchResultTOON normalizes a SearchResult and streams it as TOON JSONL events using the same event schema as StreamTOON.
func (*Client) StreamTOON ¶
StreamTOON streams a normalized document as a TOON event sequence in JSONL.
- ctx controls cancellation
- w is the destination writer (stdout, file, socket, etc.)
- doc is a normalized Aether document (model.Document / NormalizedDocument)
The function converts the document to TOON 2.0 and then emits:
doc_start → doc_meta? → token* → doc_end
func (*Client) ToNormalized ¶
func (c *Client) ToNormalized(sr *SearchResult) *model.Document
ToNormalized converts a SearchResult to a normalized model.Document.
func (*Client) ToTOON ¶
func (c *Client) ToTOON(sr *SearchResult) *toon.Document
ToTOON converts a public SearchResult into a TOON 2.0 Document.
Steps:
- NormalizeSearchResult → model.Document
- toon.FromModel → toon.Document
func (*Client) ToTOONFromModel ¶
ToTOONFromModel converts an internal normalized document directly to TOON. Useful for embedding Aether as a library when skipping SearchResult pipeline.
func (*Client) UnmarshalBTON ¶
UnmarshalBTON parses BT0N back into a TOON Document.
func (*Client) WhiteHouseRecentPosts ¶
func (*Client) WikidataLookup ¶
func (*Client) WikipediaSummary ¶
WikipediaSummary returns a concise summary for a given topic title using the Wikipedia REST API.
type Config ¶
type Config struct {
// Networking
UserAgent string
RequestTimeout time.Duration
MaxConcurrentHosts int
MaxRequestsPerHost int
// Logging
EnableDebugLogging bool
// Caching
EnableMemoryCache bool
EnableFileCache bool
EnableRedisCache bool
CacheDirectory string
RedisAddress string
CacheTTL time.Duration
MaxCacheEntries int
}
Config is the public, inspectable view of effective Aether configuration.
type CrawlOptions ¶
type CrawlOptions struct {
// Maximum link depth to traverse (0 = only the start page).
// If MaxDepth < 0, depth is unlimited.
MaxDepth int
// Maximum number of pages to fetch. If <= 0, unlimited.
MaxPages int
// Restrict crawling to the same host as the start URL.
SameHostOnly bool
// Additional domain_allow list; empty means "no restrictions".
AllowedDomains []string
// Optional domain blocklist.
DisallowedDomains []string
// Minimum delay between requests to the same host.
FetchDelay time.Duration
// Future-proof concurrency parameter.
Concurrency int
// Callback invoked for each visited page.
Visitor CrawlVisitor
}
CrawlOptions configures the behavior of Aether's public crawl API.
type CrawlVisitor ¶
type CrawlVisitor interface {
VisitCrawledPage(ctx context.Context, page *CrawledPage) error
}
CrawlVisitor defines the callback interface for receiving crawled pages.
type CrawlVisitorFunc ¶
type CrawlVisitorFunc func(ctx context.Context, page *CrawledPage) error
CrawlVisitorFunc is a functional adapter for using ordinary functions.
func (CrawlVisitorFunc) VisitCrawledPage ¶
func (f CrawlVisitorFunc) VisitCrawledPage(ctx context.Context, page *CrawledPage) error
VisitCrawledPage calls f(ctx, page).
type CrawledPage ¶
type CrawledPage struct {
URL string
Depth int
StatusCode int
Content string
Links []string
Metadata map[string]string
}
CrawledPage is the public representation of a fetched page during crawling. This is a thin wrapper around internal/crawl.Page.
type DetectionResult ¶
type DetectionResult struct {
URL string
RawType string
SubType string
MIME string
Charset string
Encoding string
IsBinary bool
Metadata map[string]string
Title string
Canonical string
}
DetectionResult is the public type returned to callers.
type Error ¶
Error is Aether's structured error type, re-exported for public use.
Callers may use errors.Is / errors.As with this type, or inspect the Kind field directly to react to specific failure categories.
type ErrorKind ¶
ErrorKind is a high-level category of Aether error.
It is intentionally string-based so it can be logged, compared and inspected easily without depending on internal implementation details.
const ( ErrorKindUnknown ErrorKind = internal.KindUnknown ErrorKindConfig ErrorKind = internal.KindConfig ErrorKindHTTP ErrorKind = internal.KindHTTP ErrorKindRobots ErrorKind = internal.KindRobots ErrorKindParsing ErrorKind = internal.KindParsing )
Public error kind constants that mirror the internal error kinds. These allow callers to distinguish between failure modes such as configuration errors, HTTP errors or robots.txt violations.
type FeedItem ¶
type FeedItem struct {
Title string
Link string
Description string
Content string
Author string
Published int64
Updated int64
GUID string
}
FeedItem is the public representation of a feed entry.
type FetchOption ¶
type FetchOption func(*FetchOptions)
FetchOption configures FetchOptions.
func WithHeader ¶
func WithHeader(key, value string) FetchOption
WithHeader adds or overrides a single HTTP header for the Fetch call.
Multiple WithHeader options can be combined; later calls override earlier ones for the same header key.
type FetchOptions ¶
FetchOptions describes optional parameters for Fetch.
type FetchResult ¶
type FetchResult struct {
URL string
StatusCode int
Header http.Header
Body []byte
FetchedAt time.Time
}
FetchResult is the public view of a completed HTTP fetch operation.
It intentionally exposes only safe, immutable data. Callers should treat the Body slice as read-only.
type GitHubReadme ¶
GitHubReadme is the public GitHub README representation.
type GovernmentPressRelease ¶
type GovernmentPressRelease struct {
Title string
URL string
Source string
Snippet string
DateUnix int64
}
GovernmentPressRelease is the public normalized government press item.
type HackerNewsStory ¶
type HackerNewsStory struct {
ID int64
Title string
URL string
Author string
Score int
TimeUnix int64
CommentCount int
}
HackerNewsStory is the public Hacker News story representation.
type JSONLObject ¶
JSONLObject is the container for each JSONL line. Every streamed line uses this structure.
type Option ¶
Option is a functional option that modifies the internal configuration.
func WithConcurrency ¶
WithConcurrency sets concurrency caps for outbound HTTP requests.
func WithDebugLogging ¶
WithDebugLogging enables verbose internal logs.
func WithRequestTimeout ¶
WithRequestTimeout sets the HTTP timeout duration.
func WithUserAgent ¶
WithUserAgent overrides the default HTTP User-Agent.
type Paragraph ¶
type Paragraph struct {
Text string
}
Paragraph represents paragraph text in the document.
type ParsedHTML ¶
type ParsedHTML struct {
Title string
Headings []Heading
Paragraphs []Paragraph
Links []Link
Meta map[string]string
}
ParsedHTML represents a normalized view of an HTML document.
It is designed to be LLM-friendly and stable as a public API.
type QueryIntent ¶
type QueryIntent string
QueryIntent mirrors the internal smartquery.Intent type.
const ( QueryIntentUnknown QueryIntent = "unknown" QueryIntentGeneralSearch QueryIntent = "general_search" QueryIntentLookup QueryIntent = "lookup" QueryIntentNews QueryIntent = "news" QueryIntentDocs QueryIntent = "docs" QueryIntentCodeHelp QueryIntent = "code_help" QueryIntentRSS QueryIntent = "rss" QueryIntentHackerNews QueryIntent = "hackernews" QueryIntentGitHub QueryIntent = "github" )
type SearchDocument ¶
type SearchDocument struct {
URL string // canonical URL, if any
Kind SearchDocumentKind // article, html_page, feed, json, text, binary
Title string // human-readable title
Excerpt string // short summary or snippet
Content string // main textual body
Metadata map[string]string // flat key/value metadata
}
SearchDocument is the primary, high-level document for a search.
It intentionally mirrors Aether's normalized model at a higher level, and is later converted into model.Document by NormalizeSearchResult.
type SearchDocumentKind ¶
type SearchDocumentKind string
SearchDocumentKind describes the kind of the primary document.
const ( SearchDocumentKindUnknown SearchDocumentKind = "unknown" SearchDocumentKindArticle SearchDocumentKind = "article" SearchDocumentKindHTML SearchDocumentKind = "html_page" SearchDocumentKindFeed SearchDocumentKind = "feed" SearchDocumentKindJSON SearchDocumentKind = "json" SearchDocumentKindText SearchDocumentKind = "text" SearchDocumentKindBinary SearchDocumentKind = "binary" )
type SearchIntent ¶
type SearchIntent string
SearchIntent describes the broad category of a query.
const ( SearchIntentUnknown SearchIntent = "unknown" SearchIntentURL SearchIntent = "url" SearchIntentLookup SearchIntent = "lookup" SearchIntentPlugin SearchIntent = "plugin" )
type SearchPlan ¶
type SearchPlan struct {
RawQuery string // original user query
Intent SearchIntent // URL vs lookup vs plugin-driven
URL string // populated for URL-based queries
Source string // plugin or integration that provided primary result
}
SearchPlan describes how Aether decided to handle a query.
type SearchResult ¶
type SearchResult struct {
Query string
Plan SearchPlan
PrimaryDocument *SearchDocument
// Optional views that may be populated by other subsystems:
Article *Article // from ExtractText / readability engine (Stage 5)
Feed *Feed // from RSS subsystem (Stage 8)
}
SearchResult is the main output of Aether.Search.
It contains:
- the original query
- the plan describing what was done
- the primary document (SearchDocument)
- optional richer views (Article, Feed) populated by other subsystems
type SmartQueryPlan ¶
type SmartQueryPlan struct {
Query string
Intent QueryIntent
IsQuestion bool
HasURL bool
PrimarySources []string
FallbackSources []string
UseLookup bool
UseSearchIndex bool
UseOpenAPIs bool
UseFeeds bool
UsePlugins bool
}
SmartQueryPlan is the public routing plan returned by Aether.
It describes how Aether *would* answer the query, without actually performing the network calls.
type Weather ¶
type Weather struct {
TimeUnix int64
Temperature float64
Humidity float64
WindSpeed float64
Summary string
}
Weather represents a normalized hourly weather entry.
type WhiteHousePost ¶
WhiteHousePost is the public White House post representation.