model

package
v0.17.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Apr 3, 2026 License: GPL-3.0 Imports: 9 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

This section is empty.

Types

type AggregateRating

type AggregateRating struct {
	ReviewCount int     `json:"reviewCount,omitempty"`
	RatingCount int     `json:"ratingCount,omitempty"`
	RatingValue float64 `json:"ratingValue,omitempty"`
	BestRating  int     `json:"bestRating,omitempty"`
	WorstRating int     `json:"worstRating,omitempty"`
}

AggregateRating represents the average rating based on multiple ratings or reviews https://schema.org/AggregateRating

type Allergen added in v0.15.3

type Allergen struct {
	Name        string `json:"name,omitempty"`
	Description string `json:"description,omitempty"`
	TracesOf    bool   `json:"tracesOf,omitempty"`
}

type DataInput

type DataInput struct {
	Url            string
	Text           string
	RootNode       *html.Node           `json:"-"`
	Document       *goquery.Document    `json:"-"`
	Schemas        *microdata.Microdata `json:"-"`
	RequestOptions RequestOptions       `json:"-"`
}

DataInput represents the input data for the scraper

type DiscoveredFeed added in v0.17.0

type DiscoveredFeed struct {
	// Source describes how the feed was discovered.
	Source DiscoverySource `json:"source"`
	// Selector holds the source-specific locator:
	// - "rss-link": the absolute RSS/Atom feed URL
	// - "sitemap": the sitemap URL that was fetched
	// - "dom-container": the structural CSS-path key of the container containing recipe links
	Selector string `json:"selector,omitempty"`
	// UrlPattern is the common path prefix shared by discovered recipe URLs, e.g. "/recipes/".
	UrlPattern string `json:"urlPattern,omitempty"`
	// ConfidenceScore is the scoring result for dom-container source (0.0–1.0).
	ConfidenceScore float64 `json:"confidenceScore,omitempty"`
}

DiscoveredFeed holds the result of automatic feed discovery. Serialize and pass back via FeedOptions.Discovered to skip re-discovery.

type DiscoverySource added in v0.17.0

type DiscoverySource string

DiscoverySource identifies the method used to discover a feed.

type Feed added in v0.9.0

type Feed struct {
	Name        string         `json:"name,omitempty"`
	Url         string         `json:"url,omitempty"`
	Description string         `json:"description,omitempty"`
	Language    string         `json:"inLanguage,omitempty"`
	Images      []*ImageObject `json:"image,omitempty"`
	Publisher   *Organization  `json:"publisher,omitempty"`
	Entries     []*Recipe      `json:"entries,omitempty"`
	// Discovered is populated when the universal discovery strategy was used.
	// Persist and pass back via FeedOptions.Discovered for faster subsequent calls.
	Discovered *DiscoveredFeed `json:"discovered,omitempty"`
}

Feed represents a list of recipes found on a page or in a feed

func (*Feed) AddEntry added in v0.9.0

func (f *Feed) AddEntry(entry *Recipe) bool

AddEntry adds a recipe to the feed if it does not already exist

func (*Feed) String added in v0.9.0

func (f *Feed) String() string

type FeedOptions added in v0.9.0

type FeedOptions struct {
	ScrapeOptions

	// When true, skip parsing feed meta tags and rely on other sources.
	SkipFeedMeta bool
	// When true, skip parsing RSS feeds and rely on other sources.
	SkipRSSScraper bool
	// When true, only the feed will be scraped without scraping each entry separately. Useful for quick runs.
	SkipEntriesScrape bool
	// When true, skip the universal discovery strategy.
	SkipDiscoveryScraper bool
	// AllowDiscoverySampling fetches 2–3 candidate URLs to confirm they are recipe pages.
	// Disabled by default; enabling adds extra HTTP requests but validates low-confidence results.
	AllowDiscoverySampling bool
	// Discovered, if non-nil, skips discovery and reuses previously discovered container/feed.
	// Populate from a previously returned Feed.Discovered value.
	Discovered *DiscoveredFeed
}

FeedOptions options for feed scraping

type FeedScraper added in v0.9.0

type FeedScraper = func(data *DataInput, feed *Feed) error

FeedScraper defines a function that returns a feed from the input data

type HTTPClient added in v0.16.1

type HTTPClient interface {
	Do(req *http.Request) (*http.Response, error)
	Get(url string) (*http.Response, error)
	Post(url, contentType string, body io.Reader) (*http.Response, error)
}

HTTPClient is an interface that allows http.Client or safeurl.Client to be used interchangeably.

type HowToSection

type HowToSection struct {
	HowToStep              // because it's optional to have a group, we have to embed `HowToStep` here
	Steps     []*HowToStep `json:"itemListElement,omitempty"`
}

HowToSection a group of steps in the instructions https://schema.org/HowToSection

type HowToStep

type HowToStep struct {
	Name  string `json:"name,omitempty"`
	Text  string `json:"text,omitempty"`
	Url   string `json:"url,omitempty"`
	Image string `json:"image,omitempty"`
	Video string `json:"video,omitempty"`
}

HowToStep a step in the instructions https://schema.org/HowToStep

type HowToTool added in v0.15.0

type HowToTool struct {
	Name        string `json:"name,omitempty"`
	Description string `json:"description,omitempty"`
	Url         string `json:"url,omitempty"`
	Image       string `json:"image,omitempty"`
	Quantity    string `json:"requiredQuantity,omitempty"`
}

HowToTool represents a tool used in the instructions https://schema.org/HowToTool

type ImageObject

type ImageObject struct {
	Url     string `json:"url,omitempty"`
	Width   int    `json:"width,omitempty"`
	Height  int    `json:"height,omitempty"`
	Caption string `json:"caption,omitempty"`
}

ImageObject represents an image object https://schema.org/ImageObject

type NutritionInformation

type NutritionInformation struct {
	ServingSize           string   `json:"servingSize,omitempty"`           // The serving size, in terms of the number of volume or mass.
	Calories              *float64 `json:"calories,omitempty"`              // The number of calories.
	CarbohydrateContent   *float64 `json:"carbohydrateContent,omitempty"`   // The number of grams of carbohydrates.
	CholesterolContent    *float64 `json:"cholesterolContent,omitempty"`    // The number of milligrams of cholesterol.
	FatContent            *float64 `json:"fatContent,omitempty"`            // The number of grams of fat.
	FiberContent          *float64 `json:"fiberContent,omitempty"`          // The number of grams of fiber.
	ProteinContent        *float64 `json:"proteinContent,omitempty"`        // The number of grams of protein.
	SaturatedFatContent   *float64 `json:"saturatedFatContent,omitempty"`   // The number of grams of saturated fat.
	SodiumContent         *float64 `json:"sodiumContent,omitempty"`         // The number of milligrams of sodium.
	SugarContent          *float64 `json:"sugarContent,omitempty"`          // The number of grams of sugar.
	TransFatContent       *float64 `json:"transFatContent,omitempty"`       // The number of grams of trans fat.
	UnsaturatedFatContent *float64 `json:"unsaturatedFatContent,omitempty"` // The number of grams of unsaturated fat.
	// other minerals commonly found in recipes, not covered by schema.org
	SaltContent      *float64 `json:"saltContent,omitempty"`      // The number of grams of salt.
	IronContent      *float64 `json:"ironContent,omitempty"`      // The number of milligrams of iron.
	PotassiumContent *float64 `json:"potassiumContent,omitempty"` // The number of milligrams of potassium.
	CalciumContent   *float64 `json:"calciumContent,omitempty"`   // The number of milligrams of calcium.
}

NutritionInformation according to https://schema.org/NutritionInformation

type Organization

type Organization struct {
	Name        string `json:"name,omitempty"`
	Description string `json:"description,omitempty"`
	Url         string `json:"url,omitempty"`
}

Organization according to https://schema.org/Organization

type Person

type Person struct {
	Name        string   `json:"name,omitempty"`
	JobTitle    string   `json:"jobTitle,omitempty"`
	Description string   `json:"description,omitempty"`
	KnowsAbout  []string `json:"knowsAbout,omitempty"`
	Url         string   `json:"url,omitempty"`
	Image       string   `json:"image,omitempty"`
}

Person according to https://schema.org/Person

type PropertyValue added in v0.15.0

type PropertyValue struct {
	Value       string `json:"value,omitempty"` // The quantitative value of the property, e.g. "2", "1/2", "a pinch"
	MaxValue    string `json:"maxValue,omitempty"`
	MinValue    string `json:"minValue,omitempty"`
	UnitText    string `json:"unitText,omitempty"` // The unit of measurement, e.g. "g", "cup", "teaspoon"
	UnitCode    string `json:"unitCode,omitempty"`
	Name        string `json:"name,omitempty"` // The name of the property, e.g. "sugar", "flour", "salt"
	Image       string `json:"image,omitempty"`
	Url         string `json:"url,omitempty"`
	Description string `json:"description,omitempty"`
	// extra fields not covered by schema.org
	Category      string      `json:"category,omitempty"`
	Pantry        bool        `json:"pantry,omitempty"`
	EstimatedCost string      `json:"estimatedCost,omitempty"`
	Allergens     []*Allergen `json:"allergens,omitempty"`
}

PropertyValue represents a property-value pair, e.g. an ingredient and its amount https://schema.org/PropertyValue

type Recipe

type Recipe struct {
	Url           string                `json:"url,omitempty"`
	Name          string                `json:"name,omitempty"`
	Description   string                `json:"description,omitempty"`
	Language      string                `json:"inLanguage,omitempty"`
	Images        []*ImageObject        `json:"image,omitempty"`
	Author        *Person               `json:"author,omitempty"`
	Publisher     *Organization         `json:"publisher,omitempty"`
	Text          string                `json:"text,omitempty"`
	PrepTime      string                `json:"prepTime,omitempty"`
	CookTime      string                `json:"cookTime,omitempty"` // alias `performTime`
	TotalTime     string                `json:"totalTime,omitempty"`
	Difficulty    string                `json:"educationalLevel,omitempty"` // `difficulty` is not a part of Recipe schema https://github.com/schemaorg/schemaorg/issues/3130
	CookingMethod string                `json:"cookingMethod,omitempty"`
	Diets         []string              `json:"suitableForDiet,omitempty"`
	Categories    []string              `json:"recipeCategory,omitempty"`
	Cuisines      []string              `json:"recipeCuisine,omitempty"`
	Keywords      []string              `json:"keywords,omitempty"`
	Yield         string                `json:"recipeYield,omitempty"`        // alias `yield`
	Ingredients   []*PropertyValue      `json:"recipeIngredient,omitempty"`   // alias `supply`
	Equipment     []*HowToTool          `json:"tool,omitempty"`               // alias `tool`, `recipeEquipment` is not a part of Recipe schema https://github.com/schemaorg/schemaorg/issues/3132
	Instructions  []*HowToSection       `json:"recipeInstructions,omitempty"` // alias `step`
	Nutrition     *NutritionInformation `json:"nutrition,omitempty"`
	Rating        *AggregateRating      `json:"aggregateRating,omitempty"`
	CommentCount  int                   `json:"commentCount,omitempty"`
	Video         *VideoObject          `json:"video,omitempty"`
	Links         []string              `json:"sameAs,omitempty"` // maybe not the cleanest name, but we can store additional links here
	EstimatedCost string                `json:"estimatedCost,omitempty"`
	Allergens     []*Allergen           `json:"allergens,omitempty"` // not a part of Recipe schema
	DateModified  *time.Time            `json:"dateModified,omitempty"`
	DatePublished *time.Time            `json:"datePublished,omitempty"`
}

Recipe is the basic struct for the recipe https://schema.org/Recipe Perhaps, I would rename recipeYield, recipeIngredient, recipeInstructions to their aliases, but many websites expect only these names (like Google Search https://developers.google.com/search/docs/appearance/structured-data/recipe)

func (*Recipe) AddImage

func (r *Recipe) AddImage(image *ImageObject)

func (*Recipe) AddImageUrl

func (r *Recipe) AddImageUrl(imageUrl string)

func (*Recipe) IsValid added in v0.9.0

func (r *Recipe) IsValid() bool

func (*Recipe) String

func (r *Recipe) String() string

func (*Recipe) Validate added in v0.17.0

func (r *Recipe) Validate(filter RecipeFilter) error

type RecipeFilter added in v0.17.0

type RecipeFilter struct {
	// Whether to accept recipes without images.
	OptionalImage bool
	// Whether to accept recipes without a publisher.
	OptionalPublisher bool
	// Whether to accept recipes without ingredients.
	OptionalIngredients bool
	// When set, recipes with fewer than these many ingredients are rejected.
	// Useful to skip prepared/frozen food recipes, which usually have very few ingredients.
	MinIngredients int
	// Whether to accept recipes without instructions.
	OptionalInstructions bool
}

RecipeFilter holds optional criteria for filtering recipes beyond basic validity.

type RequestOptions added in v0.16.0

type RequestOptions struct {
	// Context for cancellation. If nil, context.Background() is used.
	Context context.Context
	// Headers to merge with defaults. Custom values take priority.
	Headers http.Header
	// HttpClient to use. If nil, a default 30s-timeout client is used.
	HttpClient HTTPClient
}

RequestOptions holds reusable HTTP configuration for all requests in a scrape session.

type ScrapeOptions added in v0.16.0

type ScrapeOptions struct {
	RequestOptions
	RecipeFilter

	// When true, skip scraping the URL from meta tags and rely on the input URL.
	SkipMetaUrl bool
	// When true, skip parsing microdata and rely on other sources. It is a main source of data.
	SkipMicrodata bool
	// When true, skip parsing schemas and rely on other sources.
	SkipSchemaScraper bool
	// When true, skip parsing opengraph and rely on other sources.
	SkipOpenGraphScraper bool
	// When true, skip parsing custom scrapers and rely on other sources.
	SkipCustomScrapers bool
}

ScrapeOptions options for scraping a recipe

type Scraper

type Scraper = func(data *DataInput, recipe *Recipe) error

Scraper defines a function that fill a recipe from the input data

type VideoObject

type VideoObject struct {
	Name         string     `json:"name,omitempty"`
	Description  string     `json:"description,omitempty"`
	Duration     string     `json:"duration,omitempty"`
	EmbedUrl     string     `json:"embedUrl,omitempty"`
	ContentUrl   string     `json:"contentUrl,omitempty"`
	ThumbnailUrl string     `json:"thumbnailUrl,omitempty"`
	UploadDate   *time.Time `json:"uploadDate,omitempty"`
}

VideoObject represents a video object https://schema.org/VideoObject

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL