web

package

v0.1.2 Latest Latest Go to latest Published: Jul 24, 2025 License: MIT Imports: 5 Imported by: 0

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/anboat/strato-sdk

Links

Open Source Insights

Documentation ¶

Index ¶

func RegisterAdapterCreator(scraperName string, creator AdapterCreatorFunc)
func RegisterAllWebAdapters() error
func RegisterWebAdapter(engine WebScraper, factory AdapterFactory)
type AdapterCreatorFunc
type AdapterFactory
type DefaultWebStrategy
- func NewDefaultWebStrategy(config *WebStrategyConfig) *DefaultWebStrategy
- func (s *DefaultWebStrategy) Execute(ctx context.Context, url string, options *ScrapeOptions) (*WebContent, error)
- func (s *DefaultWebStrategy) ExecuteMultiple(ctx context.Context, urls []string, options *ScrapeOptions) ([]*WebContent, error)
type Format
- func (f Format) IsValid() bool
- func (f Format) String() string
type Image
type Link
type ScrapeOptions
type WebAdapter
- func CreateWebAdapter(engine WebScraper) (WebAdapter, error)
type WebContent
type WebRegistry
- func NewWebRegistry() *WebRegistry
- func (r *WebRegistry) Create(scraper WebScraper) (WebAdapter, error)
- func (r *WebRegistry) Register(scraper WebScraper, factory AdapterFactory)
type WebScraper
- func (e WebScraper) String() string
type WebStrategy
- func NewDefaultWebStrategyFromConfig() (WebStrategy, error)
type WebStrategyConfig

Constants ¶

This section is empty.

Variables ¶

This section is empty.

Functions ¶

func RegisterAdapterCreator ¶

func RegisterAdapterCreator(scraperName string, creator AdapterCreatorFunc)

RegisterAdapterCreator registers an adapter creator function for a given scraper name.

func RegisterAllWebAdapters ¶

func RegisterAllWebAdapters() error

RegisterAllWebAdapters registers all web adapters based on the application configuration.

func RegisterWebAdapter ¶

func RegisterWebAdapter(engine WebScraper, factory AdapterFactory)

RegisterWebAdapter registers a web adapter globally.

Types ¶

type AdapterCreatorFunc ¶

type AdapterCreatorFunc func(scraperConfig types.WebScraperConfig) (WebAdapter, error)

AdapterCreatorFunc defines the function signature for an adapter creator.

type AdapterFactory ¶

type AdapterFactory func() (WebAdapter, error)

AdapterFactory defines the function signature for a web adapter factory.

type DefaultWebStrategy ¶

type DefaultWebStrategy struct {
	// contains filtered or unexported fields
}

DefaultWebStrategy is the default implementation of the web scraping strategy.

func NewDefaultWebStrategy ¶

func NewDefaultWebStrategy(config *WebStrategyConfig) *DefaultWebStrategy

NewDefaultWebStrategy creates a new default web scraping strategy.

func (*DefaultWebStrategy) Execute ¶

func (s *DefaultWebStrategy) Execute(ctx context.Context, url string, options *ScrapeOptions) (*WebContent, error)

Execute executes the web scraping strategy for a single URL.

func (*DefaultWebStrategy) ExecuteMultiple ¶

func (s *DefaultWebStrategy) ExecuteMultiple(ctx context.Context, urls []string, options *ScrapeOptions) ([]*WebContent, error)

ExecuteMultiple executes the web scraping strategy for multiple URLs.

type Format ¶

type Format string

Format represents the scraping format enumeration.

const (
	FormatText     Format = "text"
	FormatHTML     Format = "html"
	FormatMarkdown Format = "markdown"
)

func (Format) IsValid ¶

func (f Format) IsValid() bool

IsValid checks if the format is valid.

func (Format) String ¶

func (f Format) String() string

String implements the fmt.Stringer interface.

type Image ¶

type Image struct {
	URL   string `json:"url"`
	Title string `json:"title,omitempty"`
}

Image represents an image.

type Link ¶

type Link struct {
	URL  string `json:"url"`
	Text string `json:"text"`
}

Link represents a hyperlink.

type ScrapeOptions ¶

type ScrapeOptions struct {
	// Basic options
	Format        Format            `json:"format,omitempty"`         // Scraping format enumeration
	Timeout       int               `json:"timeout,omitempty"`        // Timeout in seconds
	Cookies       map[string]string `json:"cookies,omitempty"`        // Cookies to use for the request
	ImagesSummary bool              `json:"images_summary,omitempty"` // Whether to include a summary of unique images at the end
	LinksSummary  bool              `json:"links_summary,omitempty"`  // Whether to include a summary of unique links at the end
}

ScrapeOptions holds the options for a scraping request.

type WebAdapter ¶

type WebAdapter interface {
	// Scrape is the core function for scraping a single page.
	Scrape(ctx context.Context, url string, options *ScrapeOptions) (*WebContent, error)

	// ScrapeMultiple scrapes multiple pages concurrently.
	ScrapeMultiple(ctx context.Context, urls []string, options *ScrapeOptions) ([]*WebContent, error)
}

WebAdapter is the interface for a web adapter.

func CreateWebAdapter ¶

func CreateWebAdapter(engine WebScraper) (WebAdapter, error)

CreateWebAdapter creates a web adapter instance from the global registry.

type WebContent ¶

type WebContent struct {
	// Basic information
	URL     string `json:"url"`
	Title   string `json:"title"`
	Content string `json:"content"` // Main content

	// Extracted structured data
	Links  []Link  `json:"links,omitempty"`
	Images []Image `json:"images,omitempty"`
}

WebContent represents the content of a web page (simplified response).

type WebRegistry ¶

type WebRegistry struct {
	// contains filtered or unexported fields
}

WebRegistry is a registry for web adapters.

func NewWebRegistry ¶

func NewWebRegistry() *WebRegistry

NewWebRegistry creates a new web adapter registry.

func (*WebRegistry) Create ¶

func (r *WebRegistry) Create(scraper WebScraper) (WebAdapter, error)

Create creates a web adapter instance.

func (*WebRegistry) Register ¶

func (r *WebRegistry) Register(scraper WebScraper, factory AdapterFactory)

Register registers a web adapter factory.

type WebScraper ¶

type WebScraper string

WebScraper represents a web scraping engine identifier.

func (WebScraper) String ¶

func (e WebScraper) String() string

String implements the fmt.Stringer interface.

type WebStrategy ¶

type WebStrategy interface {
	// Execute executes the web scraping strategy for a single URL.
	Execute(ctx context.Context, url string, options *ScrapeOptions) (*WebContent, error)

	// ExecuteMultiple executes the web scraping strategy for multiple URLs.
	ExecuteMultiple(ctx context.Context, urls []string, options *ScrapeOptions) ([]*WebContent, error)
}

WebStrategy defines the interface for a web scraping strategy.

func NewDefaultWebStrategyFromConfig ¶

func NewDefaultWebStrategyFromConfig() (WebStrategy, error)

NewDefaultWebStrategyFromConfig creates a default web scraping strategy from the global configuration.

type WebStrategyConfig ¶

type WebStrategyConfig struct {
	// DefaultScraper is the default scraper to use if no list is specified.
	DefaultScraper WebScraper `json:"default_scraper,omitempty"`

	// DefaultFallbackOrder is the default order of scrapers to try.
	DefaultFallbackOrder []WebScraper `json:"default_fallback_order,omitempty"`

	// EnableFallback determines whether to try the next scraper on failure.
	EnableFallback bool `json:"enable_fallback"`

	// FailFast determines whether to stop immediately after the first failure.
	FailFast bool `json:"fail_fast"`
}

WebStrategyConfig holds the configuration for a web scraping strategy.

Source Files ¶

View all Source files

Directories ¶

Path	Synopsis
firecrawl
jina

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL