Documentation
¶
Index ¶
- type AggregateRating
- type Allergen
- type DataInput
- type DiscoveredFeed
- type DiscoverySource
- type Feed
- type FeedOptions
- type FeedScraper
- type HTTPClient
- type HowToSection
- type HowToStep
- type HowToTool
- type ImageObject
- type NutritionInformation
- type Organization
- type Person
- type PropertyValue
- type Recipe
- type RecipeFilter
- type RequestOptions
- type ScrapeOptions
- type Scraper
- type VideoObject
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type AggregateRating ¶
type AggregateRating struct {
ReviewCount int `json:"reviewCount,omitempty"`
RatingCount int `json:"ratingCount,omitempty"`
RatingValue float64 `json:"ratingValue,omitempty"`
BestRating int `json:"bestRating,omitempty"`
WorstRating int `json:"worstRating,omitempty"`
}
AggregateRating represents the average rating based on multiple ratings or reviews https://schema.org/AggregateRating
type DataInput ¶
type DataInput struct {
Url string
Text string
RootNode *html.Node `json:"-"`
Document *goquery.Document `json:"-"`
Schemas *microdata.Microdata `json:"-"`
RequestOptions RequestOptions `json:"-"`
}
DataInput represents the input data for the scraper
type DiscoveredFeed ¶ added in v0.17.0
type DiscoveredFeed struct {
// Source describes how the feed was discovered.
Source DiscoverySource `json:"source"`
// Selector holds the source-specific locator:
// - "rss-link": the absolute RSS/Atom feed URL
// - "sitemap": the sitemap URL that was fetched
// - "dom-container": the structural CSS-path key of the container containing recipe links
Selector string `json:"selector,omitempty"`
// UrlPattern is the common path prefix shared by discovered recipe URLs, e.g. "/recipes/".
UrlPattern string `json:"urlPattern,omitempty"`
// ConfidenceScore is the scoring result for dom-container source (0.0–1.0).
ConfidenceScore float64 `json:"confidenceScore,omitempty"`
}
DiscoveredFeed holds the result of automatic feed discovery. Serialize and pass back via FeedOptions.Discovered to skip re-discovery.
type DiscoverySource ¶ added in v0.17.0
type DiscoverySource string
DiscoverySource identifies the method used to discover a feed.
type Feed ¶ added in v0.9.0
type Feed struct {
Name string `json:"name,omitempty"`
Url string `json:"url,omitempty"`
Description string `json:"description,omitempty"`
Language string `json:"inLanguage,omitempty"`
Images []*ImageObject `json:"image,omitempty"`
Publisher *Organization `json:"publisher,omitempty"`
Entries []*Recipe `json:"entries,omitempty"`
// Discovered is populated when the universal discovery strategy was used.
// Persist and pass back via FeedOptions.Discovered for faster subsequent calls.
Discovered *DiscoveredFeed `json:"discovered,omitempty"`
}
Feed represents a list of recipes found on a page or in a feed
type FeedOptions ¶ added in v0.9.0
type FeedOptions struct {
ScrapeOptions
// When true, skip parsing feed meta tags and rely on other sources.
SkipFeedMeta bool
// When true, skip parsing RSS feeds and rely on other sources.
SkipRSSScraper bool
// When true, only the feed will be scraped without scraping each entry separately. Useful for quick runs.
SkipEntriesScrape bool
// When true, skip the universal discovery strategy.
SkipDiscoveryScraper bool
// AllowDiscoverySampling fetches 2–3 candidate URLs to confirm they are recipe pages.
// Disabled by default; enabling adds extra HTTP requests but validates low-confidence results.
AllowDiscoverySampling bool
// Discovered, if non-nil, skips discovery and reuses previously discovered container/feed.
// Populate from a previously returned Feed.Discovered value.
Discovered *DiscoveredFeed
}
FeedOptions options for feed scraping
type FeedScraper ¶ added in v0.9.0
FeedScraper defines a function that returns a feed from the input data
type HTTPClient ¶ added in v0.16.1
type HTTPClient interface {
Do(req *http.Request) (*http.Response, error)
Get(url string) (*http.Response, error)
Post(url, contentType string, body io.Reader) (*http.Response, error)
}
HTTPClient is an interface that allows http.Client or safeurl.Client to be used interchangeably.
type HowToSection ¶
type HowToSection struct {
HowToStep // because it's optional to have a group, we have to embed `HowToStep` here
Steps []*HowToStep `json:"itemListElement,omitempty"`
}
HowToSection a group of steps in the instructions https://schema.org/HowToSection
type HowToStep ¶
type HowToStep struct {
Name string `json:"name,omitempty"`
Text string `json:"text,omitempty"`
Url string `json:"url,omitempty"`
Image string `json:"image,omitempty"`
Video string `json:"video,omitempty"`
}
HowToStep a step in the instructions https://schema.org/HowToStep
type HowToTool ¶ added in v0.15.0
type HowToTool struct {
Name string `json:"name,omitempty"`
Description string `json:"description,omitempty"`
Url string `json:"url,omitempty"`
Image string `json:"image,omitempty"`
Quantity string `json:"requiredQuantity,omitempty"`
}
HowToTool represents a tool used in the instructions https://schema.org/HowToTool
type ImageObject ¶
type ImageObject struct {
Url string `json:"url,omitempty"`
Width int `json:"width,omitempty"`
Height int `json:"height,omitempty"`
Caption string `json:"caption,omitempty"`
}
ImageObject represents an image object https://schema.org/ImageObject
type NutritionInformation ¶
type NutritionInformation struct {
ServingSize string `json:"servingSize,omitempty"` // The serving size, in terms of the number of volume or mass.
Calories *float64 `json:"calories,omitempty"` // The number of calories.
CarbohydrateContent *float64 `json:"carbohydrateContent,omitempty"` // The number of grams of carbohydrates.
CholesterolContent *float64 `json:"cholesterolContent,omitempty"` // The number of milligrams of cholesterol.
FatContent *float64 `json:"fatContent,omitempty"` // The number of grams of fat.
FiberContent *float64 `json:"fiberContent,omitempty"` // The number of grams of fiber.
ProteinContent *float64 `json:"proteinContent,omitempty"` // The number of grams of protein.
SaturatedFatContent *float64 `json:"saturatedFatContent,omitempty"` // The number of grams of saturated fat.
SodiumContent *float64 `json:"sodiumContent,omitempty"` // The number of milligrams of sodium.
SugarContent *float64 `json:"sugarContent,omitempty"` // The number of grams of sugar.
TransFatContent *float64 `json:"transFatContent,omitempty"` // The number of grams of trans fat.
UnsaturatedFatContent *float64 `json:"unsaturatedFatContent,omitempty"` // The number of grams of unsaturated fat.
// other minerals commonly found in recipes, not covered by schema.org
SaltContent *float64 `json:"saltContent,omitempty"` // The number of grams of salt.
IronContent *float64 `json:"ironContent,omitempty"` // The number of milligrams of iron.
PotassiumContent *float64 `json:"potassiumContent,omitempty"` // The number of milligrams of potassium.
CalciumContent *float64 `json:"calciumContent,omitempty"` // The number of milligrams of calcium.
}
NutritionInformation according to https://schema.org/NutritionInformation
type Organization ¶
type Organization struct {
Name string `json:"name,omitempty"`
Description string `json:"description,omitempty"`
Url string `json:"url,omitempty"`
Logo string `json:"logo,omitempty"`
}
Organization according to https://schema.org/Organization
type Person ¶
type Person struct {
Name string `json:"name,omitempty"`
JobTitle string `json:"jobTitle,omitempty"`
Description string `json:"description,omitempty"`
KnowsAbout []string `json:"knowsAbout,omitempty"`
Url string `json:"url,omitempty"`
Image string `json:"image,omitempty"`
}
Person according to https://schema.org/Person
type PropertyValue ¶ added in v0.15.0
type PropertyValue struct {
Value string `json:"value,omitempty"` // The quantitative value of the property, e.g. "2", "1/2", "a pinch"
MaxValue string `json:"maxValue,omitempty"`
MinValue string `json:"minValue,omitempty"`
UnitText string `json:"unitText,omitempty"` // The unit of measurement, e.g. "g", "cup", "teaspoon"
UnitCode string `json:"unitCode,omitempty"`
Name string `json:"name,omitempty"` // The name of the property, e.g. "sugar", "flour", "salt"
Image string `json:"image,omitempty"`
Url string `json:"url,omitempty"`
Description string `json:"description,omitempty"`
// extra fields not covered by schema.org
Category string `json:"category,omitempty"`
Pantry bool `json:"pantry,omitempty"`
EstimatedCost string `json:"estimatedCost,omitempty"`
Allergens []*Allergen `json:"allergens,omitempty"`
}
PropertyValue represents a property-value pair, e.g. an ingredient and its amount https://schema.org/PropertyValue
type Recipe ¶
type Recipe struct {
Url string `json:"url,omitempty"`
Name string `json:"name,omitempty"`
Description string `json:"description,omitempty"`
Language string `json:"inLanguage,omitempty"`
Images []*ImageObject `json:"image,omitempty"`
Author *Person `json:"author,omitempty"`
Publisher *Organization `json:"publisher,omitempty"`
Text string `json:"text,omitempty"`
PrepTime string `json:"prepTime,omitempty"`
CookTime string `json:"cookTime,omitempty"` // alias `performTime`
TotalTime string `json:"totalTime,omitempty"`
Difficulty string `json:"educationalLevel,omitempty"` // `difficulty` is not a part of Recipe schema https://github.com/schemaorg/schemaorg/issues/3130
CookingMethod string `json:"cookingMethod,omitempty"`
Diets []string `json:"suitableForDiet,omitempty"`
Categories []string `json:"recipeCategory,omitempty"`
Cuisines []string `json:"recipeCuisine,omitempty"`
Keywords []string `json:"keywords,omitempty"`
Yield string `json:"recipeYield,omitempty"` // alias `yield`
Ingredients []*PropertyValue `json:"recipeIngredient,omitempty"` // alias `supply`
Equipment []*HowToTool `json:"tool,omitempty"` // alias `tool`, `recipeEquipment` is not a part of Recipe schema https://github.com/schemaorg/schemaorg/issues/3132
Instructions []*HowToSection `json:"recipeInstructions,omitempty"` // alias `step`
Nutrition *NutritionInformation `json:"nutrition,omitempty"`
Rating *AggregateRating `json:"aggregateRating,omitempty"`
CommentCount int `json:"commentCount,omitempty"`
Video *VideoObject `json:"video,omitempty"`
Links []string `json:"sameAs,omitempty"` // maybe not the cleanest name, but we can store additional links here
EstimatedCost string `json:"estimatedCost,omitempty"`
Allergens []*Allergen `json:"allergens,omitempty"` // not a part of Recipe schema
DateModified *time.Time `json:"dateModified,omitempty"`
DatePublished *time.Time `json:"datePublished,omitempty"`
}
Recipe is the basic struct for the recipe https://schema.org/Recipe Perhaps, I would rename recipeYield, recipeIngredient, recipeInstructions to their aliases, but many websites expect only these names (like Google Search https://developers.google.com/search/docs/appearance/structured-data/recipe)
func (*Recipe) AddImage ¶
func (r *Recipe) AddImage(image *ImageObject)
func (*Recipe) AddImageUrl ¶
func (*Recipe) Validate ¶ added in v0.17.0
func (r *Recipe) Validate(filter RecipeFilter) error
type RecipeFilter ¶ added in v0.17.0
type RecipeFilter struct {
// Whether to accept recipes without images.
OptionalImage bool
// Whether to accept recipes without a publisher.
OptionalPublisher bool
// Whether to accept recipes without ingredients.
OptionalIngredients bool
// When set, recipes with fewer than these many ingredients are rejected.
// Useful to skip prepared/frozen food recipes, which usually have very few ingredients.
MinIngredients int
// Whether to accept recipes without instructions.
OptionalInstructions bool
}
RecipeFilter holds optional criteria for filtering recipes beyond basic validity.
type RequestOptions ¶ added in v0.16.0
type RequestOptions struct {
// Context for cancellation. If nil, context.Background() is used.
Context context.Context
// Headers to merge with defaults. Custom values take priority.
Headers http.Header
// HttpClient to use. If nil, a default 30s-timeout client is used.
HttpClient HTTPClient
}
RequestOptions holds reusable HTTP configuration for all requests in a scrape session.
type ScrapeOptions ¶ added in v0.16.0
type ScrapeOptions struct {
RequestOptions
RecipeFilter
// When true, skip scraping the URL from meta tags and rely on the input URL.
SkipMetaUrl bool
// When true, skip parsing microdata and rely on other sources. It is a main source of data.
SkipMicrodata bool
// When true, skip parsing schemas and rely on other sources.
SkipSchemaScraper bool
// When true, skip parsing opengraph and rely on other sources.
SkipOpenGraphScraper bool
// When true, skip parsing custom scrapers and rely on other sources.
SkipCustomScrapers bool
}
ScrapeOptions options for scraping a recipe
type VideoObject ¶
type VideoObject struct {
Name string `json:"name,omitempty"`
Description string `json:"description,omitempty"`
Duration string `json:"duration,omitempty"`
EmbedUrl string `json:"embedUrl,omitempty"`
ContentUrl string `json:"contentUrl,omitempty"`
ThumbnailUrl string `json:"thumbnailUrl,omitempty"`
UploadDate *time.Time `json:"uploadDate,omitempty"`
}
VideoObject represents a video object https://schema.org/VideoObject